From 66ecf82ffc282755eca89b4ba1ce5ae211aa3f39 Mon Sep 17 00:00:00 2001 From: Aron Xu Date: Thu, 20 Nov 2014 17:03:19 +0800 Subject: [PATCH] Imported Upstream version 1.0.2 --- .cproject | 68 - .gitignore | 12 + .idea/codeStyleSettings.xml | 63 + .npmignore | 12 +- .project | 26 - .travis.yml | 11 + CMakeLists.txt | 118 +- Makefile | 68 + NEWS.md | 50 +- README.md | 177 +- binding.gyp | 30 +- data/CMakeLists.txt | 209 +- data/cn/cn-it.txt | 338 --- data/cn/cn-name.txt | 81 - data/cn/cn-other.txt | 10 - data/cn/merge.sh | 1 - data/cn/to_cn_phrases.txt | 429 ---- data/config/hk2s.json | 33 + data/config/mix2zhs.ini | 21 - data/config/mix2zht.ini | 21 - data/config/s2hk.json | 33 + data/config/s2t.json | 22 + data/config/s2tw.json | 27 + data/config/s2twp.json | 32 + data/config/t2s.json | 22 + data/config/tw2s.json | 33 + data/config/tw2sp.json | 38 + data/config/zhs2zht.ini | 20 - data/config/zhs2zhtw_p.ini | 21 - data/config/zhs2zhtw_v.ini | 21 - data/config/zhs2zhtw_vp.ini | 22 - data/config/zht2zhs.ini | 20 - data/config/zht2zhtw_p.ini | 19 - data/config/zht2zhtw_v.ini | 19 - data/config/zht2zhtw_vp.ini | 20 - data/config/zhtw2zhcn_s.ini | 23 - data/config/zhtw2zhcn_t.ini | 21 - data/config/zhtw2zhs.ini | 22 - data/config/zhtw2zht.ini | 20 - data/dictionary/HKVariants.txt | 62 + data/dictionary/HKVariantsPhrases.txt | 17 + data/dictionary/HKVariantsRevPhrases.txt | 139 ++ .../JPVariants.txt} | 0 .../STCharacters.txt} | 660 +++++- .../phrases.txt => dictionary/STPhrases.txt} | 297 ++- .../TSCharacters.txt} | 653 +++++- .../phrases.txt => dictionary/TSPhrases.txt} | 50 +- .../tw-it.txt => dictionary/TWPhrasesIT.txt} | 33 +- .../TWPhrasesName.txt} | 10 +- data/dictionary/TWPhrasesOther.txt | 14 + .../TWVariants.txt} | 17 +- .../TWVariantsRevPhrases.txt} | 50 +- data/scripts/common.py | 30 +- data/scripts/common.pyc | Bin 2002 -> 0 bytes data/scripts/find_target.py | 4 +- data/scripts/merge.py | 11 +- data/scripts/reverse.py | 4 +- data/scripts/sort.py | 15 +- data/scripts/sort_all.py | 16 + data/tw/from_tw_variants.txt | 22 - data/tw/merge.sh | 1 - data/tw/to_tw_phrases.txt | 410 ---- data/tw/tw-other.txt | 10 - debug.sh | 11 - deps/darts-clone/darts.h | 1898 +++++++++++++++++ deps/rapidjson-0.11/document.h | 821 +++++++ deps/rapidjson-0.11/filestream.h | 46 + deps/rapidjson-0.11/internal/pow10.h | 54 + deps/rapidjson-0.11/internal/stack.h | 82 + deps/rapidjson-0.11/internal/strfunc.h | 24 + deps/rapidjson-0.11/prettywriter.h | 156 ++ deps/rapidjson-0.11/rapidjson.h | 525 +++++ deps/rapidjson-0.11/reader.h | 683 ++++++ deps/rapidjson-0.11/stringbuffer.h | 49 + deps/rapidjson-0.11/writer.h | 241 +++ deps/tclap-1.2.1/tclap/Arg.h | 692 ++++++ deps/tclap-1.2.1/tclap/ArgException.h | 200 ++ deps/tclap-1.2.1/tclap/ArgTraits.h | 87 + deps/tclap-1.2.1/tclap/COPYING | 25 + deps/tclap-1.2.1/tclap/CmdLine.h | 633 ++++++ deps/tclap-1.2.1/tclap/CmdLineInterface.h | 150 ++ deps/tclap-1.2.1/tclap/CmdLineOutput.h | 74 + deps/tclap-1.2.1/tclap/Constraint.h | 68 + deps/tclap-1.2.1/tclap/DocBookOutput.h | 299 +++ deps/tclap-1.2.1/tclap/HelpVisitor.h | 76 + deps/tclap-1.2.1/tclap/IgnoreRestVisitor.h | 52 + deps/tclap-1.2.1/tclap/MultiArg.h | 433 ++++ deps/tclap-1.2.1/tclap/MultiSwitchArg.h | 216 ++ .../tclap/OptionalUnlabeledTracker.h | 62 + deps/tclap-1.2.1/tclap/StandardTraits.h | 208 ++ deps/tclap-1.2.1/tclap/StdOutput.h | 298 +++ deps/tclap-1.2.1/tclap/SwitchArg.h | 266 +++ deps/tclap-1.2.1/tclap/UnlabeledMultiArg.h | 301 +++ deps/tclap-1.2.1/tclap/UnlabeledValueArg.h | 340 +++ deps/tclap-1.2.1/tclap/ValueArg.h | 425 ++++ deps/tclap-1.2.1/tclap/ValuesConstraint.h | 148 ++ deps/tclap-1.2.1/tclap/VersionVisitor.h | 81 + deps/tclap-1.2.1/tclap/Visitor.h | 53 + deps/tclap-1.2.1/tclap/XorHandler.h | 166 ++ deps/tclap-1.2.1/tclap/ZshCompletionOutput.h | 323 +++ doc/CMakeLists.txt | 10 +- doc/opencc.1 | 27 - doc/opencc.doxy.in | 46 +- doc/opencc_dict.1 | 18 - gypi/configs.gypi | 25 - gypi/dicts.gypi | 60 - gypi/global.gypi | 11 - gypi/opencc_dict.gypi | 17 - node/binding.cc | 139 +- node/configs.gypi | 19 + node/demo.js | 7 +- node/dicts.gypi | 158 ++ node/global.gypi | 30 + node/node_binding.gypi | 26 + node/opencc.js | 38 +- node/opencc_dict.gypi | 21 + node/test.js | 34 +- opencc.gyp | 39 - package.json | 7 +- po/CMakeLists.txt | 33 - po/LINGUAS | 3 - po/POTFILES.in | 24 - po/update.sh | 25 - po/zh_CN.po | 252 --- po/zh_HK.po | 252 --- po/zh_TW.po | 252 --- release.sh | 11 - src/BinaryDict.cpp | 181 ++ src/BinaryDict.hpp | 61 + src/CMakeLists.txt | 186 +- src/CmdLineOutput.hpp | 47 + src/CommandLine.cpp | 179 ++ src/Common.hpp | 91 + src/Config.cpp | 240 +++ src/{config_reader.h => Config.hpp} | 39 +- src/Conversion.cpp | 51 + src/Conversion.hpp | 50 + src/ConversionChain.cpp | 34 + src/ConversionChain.hpp | 43 + src/Converter.cpp | 35 + src/Converter.hpp | 55 + src/DartsDict.cpp | 168 ++ src/DartsDict.hpp | 60 + src/Dict.cpp | 52 + src/Dict.hpp | 81 + src/DictConverter.cpp | 105 + src/DictEntry.cpp | 38 + src/DictEntry.hpp | 241 +++ src/DictGroup.cpp | 92 + src/DictGroup.hpp | 55 + src/Exception.hpp | 91 + src/Export.hpp | 40 + src/Lexicon.hpp | 67 + src/MaxMatchSegmentation.cpp | 49 + src/MaxMatchSegmentation.hpp | 47 + src/Optional.hpp | 95 + src/Segmentation.cpp | 17 + src/{dict_chain.h => Segmentation.hpp} | 26 +- src/Segments.hpp | 124 ++ src/SerializableDict.hpp | 69 + src/SimpleConverter.cpp | 146 ++ src/TextDict.cpp | 116 + src/TextDict.hpp | 60 + src/UTF8Util.cpp | 46 + src/UTF8Util.hpp | 201 ++ src/common.h | 101 - src/config_reader.c | 243 --- src/converter.c | 607 ------ src/converter.h | 50 - src/dict.c | 95 - src/dict.h | 38 - src/dict_chain.c | 51 - src/dict_group.c | 189 -- src/dict_group.h | 57 - src/dictionary/datrie.c | 315 --- src/dictionary/datrie.h | 47 - src/dictionary/text.c | 286 --- src/dictionary/text.h | 51 - src/encoding.c | 242 --- src/encoding.h | 54 - src/opencc.c | 245 --- src/opencc.h | 282 ++- src/opencc_types.h | 60 - src/symbols.cmake | 41 - src/tools/CMakeLists.txt | 74 - src/tools/opencc.c | 196 -- src/tools/opencc_dict.c | 408 ---- src/utils.c | 182 -- src/utils.h | 48 - src/wrapper/cplusplus/openccxx.h | 136 -- src/wrapper/python/opencc.py | 90 - test/CMakeLists.txt | 85 +- test/DictTestUtils.hpp | 150 ++ test/TestUtils.hpp | 80 + test/UnitTest.cpp | 233 ++ test/config_test/config_test.json | 22 + test/config_test/config_test_characters.txt | 2 + test/config_test/config_test_phrases.txt | 2 + test/testcases/hk2s.ans | 3 + test/testcases/hk2s.in | 3 + test/testcases/mix2zhs.ans | 2 - test/testcases/mix2zhs.in | 2 - test/testcases/mix2zht.ans | 2 - test/testcases/mix2zht.in | 2 - test/testcases/s2hk.ans | 3 + test/testcases/s2hk.in | 3 + test/testcases/{zhs2zht.ans => s2t.ans} | 3 + test/testcases/{zhs2zht.in => s2t.in} | 3 + test/testcases/s2tw.ans | 2 + test/testcases/s2tw.in | 2 + test/testcases/{zhs2zhtw_vp.ans => s2twp.ans} | 0 test/testcases/{zhs2zhtw_p.in => s2twp.in} | 0 test/testcases/{zht2zhs.ans => t2s.ans} | 2 +- test/testcases/{zht2zhs.in => t2s.in} | 2 +- test/testcases/tw2s.ans | 1 + test/testcases/tw2s.in | 1 + test/testcases/{zhs2zhtw_vp.in => tw2sp.ans} | 0 test/testcases/{zhtw2zhcn_s.in => tw2sp.in} | 0 test/testcases/zhs2zhtw_p.ans | 3 - test/testcases/zhtw2zhcn_s.ans | 3 - test/testcases/zhtw2zhcn_t.ans | 3 - test/testcases/zhtw2zhcn_t.in | 3 - 222 files changed, 17153 insertions(+), 7643 deletions(-) delete mode 100644 .cproject create mode 100644 .gitignore create mode 100644 .idea/codeStyleSettings.xml delete mode 100644 .project create mode 100644 .travis.yml create mode 100644 Makefile delete mode 100644 data/cn/cn-it.txt delete mode 100644 data/cn/cn-name.txt delete mode 100644 data/cn/cn-other.txt delete mode 100755 data/cn/merge.sh delete mode 100644 data/cn/to_cn_phrases.txt create mode 100644 data/config/hk2s.json delete mode 100644 data/config/mix2zhs.ini delete mode 100644 data/config/mix2zht.ini create mode 100644 data/config/s2hk.json create mode 100644 data/config/s2t.json create mode 100644 data/config/s2tw.json create mode 100644 data/config/s2twp.json create mode 100644 data/config/t2s.json create mode 100644 data/config/tw2s.json create mode 100644 data/config/tw2sp.json delete mode 100644 data/config/zhs2zht.ini delete mode 100644 data/config/zhs2zhtw_p.ini delete mode 100644 data/config/zhs2zhtw_v.ini delete mode 100644 data/config/zhs2zhtw_vp.ini delete mode 100644 data/config/zht2zhs.ini delete mode 100644 data/config/zht2zhtw_p.ini delete mode 100644 data/config/zht2zhtw_v.ini delete mode 100644 data/config/zht2zhtw_vp.ini delete mode 100644 data/config/zhtw2zhcn_s.ini delete mode 100644 data/config/zhtw2zhcn_t.ini delete mode 100644 data/config/zhtw2zhs.ini delete mode 100644 data/config/zhtw2zht.ini create mode 100644 data/dictionary/HKVariants.txt create mode 100644 data/dictionary/HKVariantsPhrases.txt create mode 100644 data/dictionary/HKVariantsRevPhrases.txt rename data/{jp/to_jp_variants.txt => dictionary/JPVariants.txt} (100%) rename data/{simp_to_trad/characters.txt => dictionary/STCharacters.txt} (81%) rename data/{simp_to_trad/phrases.txt => dictionary/STPhrases.txt} (99%) rename data/{trad_to_simp/characters.txt => dictionary/TSCharacters.txt} (82%) rename data/{trad_to_simp/phrases.txt => dictionary/TSPhrases.txt} (82%) rename data/{tw/tw-it.txt => dictionary/TWPhrasesIT.txt} (92%) rename data/{tw/tw-name.txt => dictionary/TWPhrasesName.txt} (94%) create mode 100644 data/dictionary/TWPhrasesOther.txt rename data/{tw/to_tw_variants.txt => dictionary/TWVariants.txt} (66%) rename data/{tw/from_tw_phrases.txt => dictionary/TWVariantsRevPhrases.txt} (98%) delete mode 100644 data/scripts/common.pyc create mode 100755 data/scripts/sort_all.py delete mode 100644 data/tw/from_tw_variants.txt delete mode 100755 data/tw/merge.sh delete mode 100644 data/tw/to_tw_phrases.txt delete mode 100644 data/tw/tw-other.txt delete mode 100755 debug.sh create mode 100644 deps/darts-clone/darts.h create mode 100755 deps/rapidjson-0.11/document.h create mode 100755 deps/rapidjson-0.11/filestream.h create mode 100755 deps/rapidjson-0.11/internal/pow10.h create mode 100755 deps/rapidjson-0.11/internal/stack.h create mode 100755 deps/rapidjson-0.11/internal/strfunc.h create mode 100755 deps/rapidjson-0.11/prettywriter.h create mode 100755 deps/rapidjson-0.11/rapidjson.h create mode 100755 deps/rapidjson-0.11/reader.h create mode 100755 deps/rapidjson-0.11/stringbuffer.h create mode 100755 deps/rapidjson-0.11/writer.h create mode 100644 deps/tclap-1.2.1/tclap/Arg.h create mode 100644 deps/tclap-1.2.1/tclap/ArgException.h create mode 100644 deps/tclap-1.2.1/tclap/ArgTraits.h create mode 100644 deps/tclap-1.2.1/tclap/COPYING create mode 100644 deps/tclap-1.2.1/tclap/CmdLine.h create mode 100644 deps/tclap-1.2.1/tclap/CmdLineInterface.h create mode 100644 deps/tclap-1.2.1/tclap/CmdLineOutput.h create mode 100644 deps/tclap-1.2.1/tclap/Constraint.h create mode 100644 deps/tclap-1.2.1/tclap/DocBookOutput.h create mode 100644 deps/tclap-1.2.1/tclap/HelpVisitor.h create mode 100644 deps/tclap-1.2.1/tclap/IgnoreRestVisitor.h create mode 100644 deps/tclap-1.2.1/tclap/MultiArg.h create mode 100644 deps/tclap-1.2.1/tclap/MultiSwitchArg.h create mode 100644 deps/tclap-1.2.1/tclap/OptionalUnlabeledTracker.h create mode 100644 deps/tclap-1.2.1/tclap/StandardTraits.h create mode 100644 deps/tclap-1.2.1/tclap/StdOutput.h create mode 100644 deps/tclap-1.2.1/tclap/SwitchArg.h create mode 100644 deps/tclap-1.2.1/tclap/UnlabeledMultiArg.h create mode 100644 deps/tclap-1.2.1/tclap/UnlabeledValueArg.h create mode 100644 deps/tclap-1.2.1/tclap/ValueArg.h create mode 100644 deps/tclap-1.2.1/tclap/ValuesConstraint.h create mode 100644 deps/tclap-1.2.1/tclap/VersionVisitor.h create mode 100644 deps/tclap-1.2.1/tclap/Visitor.h create mode 100644 deps/tclap-1.2.1/tclap/XorHandler.h create mode 100644 deps/tclap-1.2.1/tclap/ZshCompletionOutput.h delete mode 100644 doc/opencc.1 delete mode 100644 doc/opencc_dict.1 delete mode 100644 gypi/configs.gypi delete mode 100644 gypi/dicts.gypi delete mode 100644 gypi/global.gypi delete mode 100644 gypi/opencc_dict.gypi create mode 100644 node/configs.gypi create mode 100644 node/dicts.gypi create mode 100644 node/global.gypi create mode 100644 node/node_binding.gypi create mode 100644 node/opencc_dict.gypi delete mode 100644 opencc.gyp delete mode 100644 po/CMakeLists.txt delete mode 100644 po/LINGUAS delete mode 100644 po/POTFILES.in delete mode 100755 po/update.sh delete mode 100644 po/zh_CN.po delete mode 100644 po/zh_HK.po delete mode 100644 po/zh_TW.po delete mode 100755 release.sh create mode 100644 src/BinaryDict.cpp create mode 100644 src/BinaryDict.hpp create mode 100644 src/CmdLineOutput.hpp create mode 100644 src/CommandLine.cpp create mode 100644 src/Common.hpp create mode 100644 src/Config.cpp rename src/{config_reader.h => Config.hpp} (51%) create mode 100644 src/Conversion.cpp create mode 100644 src/Conversion.hpp create mode 100644 src/ConversionChain.cpp create mode 100644 src/ConversionChain.hpp create mode 100644 src/Converter.cpp create mode 100644 src/Converter.hpp create mode 100644 src/DartsDict.cpp create mode 100644 src/DartsDict.hpp create mode 100644 src/Dict.cpp create mode 100644 src/Dict.hpp create mode 100644 src/DictConverter.cpp create mode 100644 src/DictEntry.cpp create mode 100644 src/DictEntry.hpp create mode 100644 src/DictGroup.cpp create mode 100644 src/DictGroup.hpp create mode 100644 src/Exception.hpp create mode 100644 src/Export.hpp create mode 100644 src/Lexicon.hpp create mode 100644 src/MaxMatchSegmentation.cpp create mode 100644 src/MaxMatchSegmentation.hpp create mode 100644 src/Optional.hpp create mode 100644 src/Segmentation.cpp rename src/{dict_chain.h => Segmentation.hpp} (61%) create mode 100644 src/Segments.hpp create mode 100644 src/SerializableDict.hpp create mode 100644 src/SimpleConverter.cpp create mode 100644 src/TextDict.cpp create mode 100644 src/TextDict.hpp create mode 100644 src/UTF8Util.cpp create mode 100644 src/UTF8Util.hpp delete mode 100644 src/common.h delete mode 100644 src/config_reader.c delete mode 100644 src/converter.c delete mode 100644 src/converter.h delete mode 100644 src/dict.c delete mode 100644 src/dict.h delete mode 100644 src/dict_chain.c delete mode 100644 src/dict_group.c delete mode 100644 src/dict_group.h delete mode 100644 src/dictionary/datrie.c delete mode 100644 src/dictionary/datrie.h delete mode 100644 src/dictionary/text.c delete mode 100644 src/dictionary/text.h delete mode 100644 src/encoding.c delete mode 100644 src/encoding.h delete mode 100644 src/opencc.c delete mode 100644 src/opencc_types.h delete mode 100644 src/symbols.cmake delete mode 100644 src/tools/CMakeLists.txt delete mode 100644 src/tools/opencc.c delete mode 100644 src/tools/opencc_dict.c delete mode 100644 src/utils.c delete mode 100644 src/utils.h delete mode 100644 src/wrapper/cplusplus/openccxx.h delete mode 100755 src/wrapper/python/opencc.py create mode 100644 test/DictTestUtils.hpp create mode 100644 test/TestUtils.hpp create mode 100644 test/UnitTest.cpp create mode 100644 test/config_test/config_test.json create mode 100644 test/config_test/config_test_characters.txt create mode 100644 test/config_test/config_test_phrases.txt create mode 100644 test/testcases/hk2s.ans create mode 100644 test/testcases/hk2s.in delete mode 100644 test/testcases/mix2zhs.ans delete mode 100644 test/testcases/mix2zhs.in delete mode 100644 test/testcases/mix2zht.ans delete mode 100644 test/testcases/mix2zht.in create mode 100644 test/testcases/s2hk.ans create mode 100644 test/testcases/s2hk.in rename test/testcases/{zhs2zht.ans => s2t.ans} (87%) rename test/testcases/{zhs2zht.in => s2t.in} (87%) create mode 100644 test/testcases/s2tw.ans create mode 100644 test/testcases/s2tw.in rename test/testcases/{zhs2zhtw_vp.ans => s2twp.ans} (100%) rename test/testcases/{zhs2zhtw_p.in => s2twp.in} (100%) rename test/testcases/{zht2zhs.ans => t2s.ans} (99%) rename test/testcases/{zht2zhs.in => t2s.in} (99%) create mode 100644 test/testcases/tw2s.ans create mode 100644 test/testcases/tw2s.in rename test/testcases/{zhs2zhtw_vp.in => tw2sp.ans} (100%) rename test/testcases/{zhtw2zhcn_s.in => tw2sp.in} (100%) delete mode 100644 test/testcases/zhs2zhtw_p.ans delete mode 100644 test/testcases/zhtw2zhcn_s.ans delete mode 100644 test/testcases/zhtw2zhcn_t.ans delete mode 100644 test/testcases/zhtw2zhcn_t.in diff --git a/.cproject b/.cproject deleted file mode 100644 index 5a2cbe9..0000000 --- a/.cproject +++ /dev/null @@ -1,68 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..652dc13 --- /dev/null +++ b/.gitignore @@ -0,0 +1,12 @@ +*.pyc +*.tgz +.project +.cproject +/build +/other +/doc/html +/opencc.xcodeproj +/test/dict.ocd +/test/dict.txt +/test/dict.bin +/xcode diff --git a/.idea/codeStyleSettings.xml b/.idea/codeStyleSettings.xml new file mode 100644 index 0000000..50cffbf --- /dev/null +++ b/.idea/codeStyleSettings.xml @@ -0,0 +1,63 @@ + + + + + + + diff --git a/.npmignore b/.npmignore index 01aa583..9ca7342 100644 --- a/.npmignore +++ b/.npmignore @@ -2,21 +2,17 @@ .gitignore CMakeLists.txt *.cmake -merge.sh +*.pyc -/po /doc -/data/scripts /data/scheme -/src/wrapper /build /debug /release /other -/debug.sh -/release.sh /opencc.pc.in -/opencc.gyp -/INSTALL /doc/html /opencc.xcodeproj +/test/dict.ocd +/test/dict.txt +/test/dict.bin diff --git a/.project b/.project deleted file mode 100644 index 029e3f8..0000000 --- a/.project +++ /dev/null @@ -1,26 +0,0 @@ - - - opencc - - - - - - org.eclipse.cdt.managedbuilder.core.genmakebuilder - clean,full,incremental, - - - - - org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder - full,incremental, - - - - - - org.eclipse.cdt.core.cnature - org.eclipse.cdt.managedbuilder.core.managedBuildNature - org.eclipse.cdt.managedbuilder.core.ScannerConfigNature - - diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..7e4f795 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,11 @@ +language: cpp +compiler: + - gcc +before_install: + - sudo add-apt-repository -y ppa:chris-lea/node.js + - sudo apt-get update + - sudo apt-get install nodejs -y + - sudo apt-get install doxygen -y + - sudo npm install -g mocha + - sudo npm install -g node-gyp +script: make test && make package && make node-test diff --git a/CMakeLists.txt b/CMakeLists.txt index b8e65c5..a63e431 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,7 @@ # # Open Chinese Convert # -# Copyright 2010-2013 BYVoid +# Copyright 2010-2014 BYVoid # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -19,109 +19,119 @@ ######## Project settings cmake_minimum_required(VERSION 2.8) set (PACKAGE_NAME opencc) -project (${PACKAGE_NAME} C) +project (${PACKAGE_NAME} CXX) include (CTest) enable_testing() +option(BUILD_SHARED_LIBS "Build opencc as shared library" ON) + ######## Package information -set (PACKAGE_URL http://code.google.com/p/opencc) -set (PACKAGE_BUGREPORT http://code.google.com/p/opencc/issues/entry) -set (OPENCC_VERSION_MAJOR 0) -set (OPENCC_VERSION_MINOR 4) -set (OPENCC_VERSION_REVISION 3) +set (PACKAGE_URL https://github.com/BYVoid/Opencc) +set (PACKAGE_BUGREPORT https://github.com/BYVoid/Opencc/issues) +set (OPENCC_VERSION_MAJOR 1) +set (OPENCC_VERSION_MINOR 0) +set (OPENCC_VERSION_REVISION 2) if (CMAKE_BUILD_TYPE MATCHES Debug) - set (version_suffix .Debug) + set (version_suffix .Debug) endif (CMAKE_BUILD_TYPE MATCHES Debug) set ( - OPENCC_VERSION - ${OPENCC_VERSION_MAJOR}.${OPENCC_VERSION_MINOR}.${OPENCC_VERSION_REVISION}${version_suffix} + OPENCC_VERSION + ${OPENCC_VERSION_MAJOR}.${OPENCC_VERSION_MINOR}.${OPENCC_VERSION_REVISION}${version_suffix} ) set(CPACK_SOURCE_PACKAGE_FILE_NAME "${PACKAGE_NAME}-${OPENCC_VERSION_MAJOR}.${OPENCC_VERSION_MINOR}.${OPENCC_VERSION_REVISION}" ) set(CPACK_SOURCE_IGNORE_FILES - "/build/;/release/;/debug/;/other/;/opencc.xcodeproj/;/.git/;.gitignore;~$;${CPACK_SOURCE_IGNORE_FILES}" + "/build/;/test/dict.ocd;/test/dict.txt;/test/dict.bin;/other/;/opencc.xcodeproj/;/.git/;.gitignore;~$;.pyc;${CPACK_SOURCE_IGNORE_FILES}" ) include(CPack) -######## Validation +######## Options option(BUILD_DOCUMENTATION "Use Doxygen to create the HTML based API documentation" OFF) -if (ENABLE_GETTEXT) - find_package(Gettext REQUIRED) -endif (ENABLE_GETTEXT) - -include (TestBigEndian) -TEST_BIG_ENDIAN(BIGENDIAN) -if (BIGENDIAN) - set (BYTEORDER BIG_ENDIAN) -else (BIGENDIAN) - set (BYTEORDER LITTLE_ENDIAN) -endif (BIGENDIAN) - ######## Windows if (WIN32) - set(CMAKE_SHARED_LIBRARY_PREFIX ${CMAKE_INSTALL_PREFIX}) - set(CMAKE_STATIC_LIBRARY_PREFIX ${CMAKE_INSTALL_PREFIX}) + set(CMAKE_SHARED_LIBRARY_PREFIX ${CMAKE_INSTALL_PREFIX}) + set(CMAKE_STATIC_LIBRARY_PREFIX ${CMAKE_INSTALL_PREFIX}) endif (WIN32) +######## Mac OS X + +set(CMAKE_MACOSX_RPATH 1) + ######## Directory set (DIR_PREFIX ${CMAKE_INSTALL_PREFIX}) -set (DIR_LIBRARY ${DIR_PREFIX}/${CMAKE_SHARED_LIBRARY_PREFIX}) -set (DIR_LIBRARY_STATIC ${DIR_PREFIX}/${CMAKE_STATIC_LIBRARY_PREFIX}) -set (DIR_INCLUDE ${DIR_PREFIX}/include) -set (DIR_SHARE ${DIR_PREFIX}/share) -set (DIR_BIN ${DIR_PREFIX}/bin) -set (DIR_ETC ${DIR_PREFIX}/etc) - -if (DEFINED CMAKE_INSTALL_LIBDIR) - set (DIR_LIBRARY ${CMAKE_INSTALL_LIBDIR}) - set (DIR_LIBRARY_STATIC ${CMAKE_INSTALL_LIBDIR}) -endif (DEFINED CMAKE_INSTALL_LIBDIR) +set (DIR_INCLUDE ${DIR_PREFIX}/include/) +set (DIR_SHARE ${DIR_PREFIX}/share/) +set (DIR_ETC ${DIR_PREFIX}/etc/) +set (DIR_LIBRARY ${DIR_PREFIX}/lib/) if (DEFINED SHARE_INSTALL_PREFIX) - set (DIR_SHARE ${SHARE_INSTALL_PREFIX}) + set (DIR_SHARE ${SHARE_INSTALL_PREFIX}) endif (DEFINED SHARE_INSTALL_PREFIX) if (DEFINED INCLUDE_INSTALL_DIR) - set (DIR_INCLUDE ${INCLUDE_INSTALL_DIR}) + set (DIR_INCLUDE ${INCLUDE_INSTALL_DIR}) endif (DEFINED INCLUDE_INSTALL_DIR) if (DEFINED SYSCONF_INSTALL_DIR) - set (DIR_ETC ${SYSCONF_INSTALL_DIR}) + set (DIR_ETC ${SYSCONF_INSTALL_DIR}) endif (DEFINED SYSCONF_INSTALL_DIR) -set (DIR_SHARE_OPENCC ${DIR_SHARE}/opencc) -set (DIR_SHARE_LOCALE ${DIR_SHARE}/locale) +set (DIR_SHARE_OPENCC ${DIR_SHARE}opencc/) +set (DIR_SHARE_LOCALE ${DIR_SHARE}locale/) ######## Configuration configure_file( - opencc.pc.in - opencc.pc - @ONLY + opencc.pc.in + opencc.pc + @ONLY ) install( - FILES - ${CMAKE_BINARY_DIR}/opencc.pc - DESTINATION - ${DIR_LIBRARY}/pkgconfig + FILES + ${CMAKE_BINARY_DIR}/opencc.pc + DESTINATION + ${DIR_LIBRARY}/pkgconfig ) +######## Compiler flags + +add_definitions( + -DPKGDATADIR="${DIR_SHARE_OPENCC}" + -DLOCALEDIR="${DIR_SHARE_LOCALE}" + -DVERSION="${OPENCC_VERSION}" + -DPACKAGE_NAME="${PACKAGE_NAME}" +) + +if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") + add_definitions( + -std=c++11 + -Wall + ) + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -pthread") +elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") + add_definitions( + -std=c++0x + -Wall + ) + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -pthread") +elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC") + add_definitions( + /Wall + /D "_CRT_SECURE_NO_WARNINGS" + ) +endif() + ######## Subdirectories add_subdirectory(src) add_subdirectory(doc) add_subdirectory(data) - -if (GETTEXT_FOUND AND ENABLE_GETTEXT) - add_subdirectory(po) -endif (GETTEXT_FOUND AND ENABLE_GETTEXT) - add_subdirectory(test) diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..cd134a7 --- /dev/null +++ b/Makefile @@ -0,0 +1,68 @@ +# +# Open Chinese Convert +# +# Copyright 2010-2014 BYVoid +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +.PHONY: build clean node test xcode-build + +build: + mkdir -p build/rel + (cd build/rel; cmake \ + -DBUILD_DOCUMENTATION:BOOL=ON \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_INSTALL_PREFIX=/usr \ + ../..) + make -C build/rel + +package: build + make -C build/rel package_source + +test: + mkdir -p build/dbg/root + (cd build/dbg; cmake \ + -DBUILD_DOCUMENTATION:BOOL=OFF \ + -DCMAKE_BUILD_TYPE=Debug \ + -DCMAKE_INSTALL_PREFIX=`pwd`/root \ + ../..) + make -C build/dbg + make -C build/dbg test + make -C build/dbg install + +node: + node-gyp configure + node-gyp build + +node-test: node + npm test + +xcode-build: + mkdir -p xcode + (cd xcode; cmake \ + -G "Xcode" \ + -DBUILD_DOCUMENTATION:BOOL=OFF \ + ..; \ + xcodebuild build) + +test-all: test node-test + +clean: + rm -rf build xcode + +install: build + make -C build/rel install + +dist: release + make -C build/rel package_source diff --git a/NEWS.md b/NEWS.md index 04dc9f4..db3e70a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,24 +1,46 @@ # Change History of OpenCC -## Ver 0.4.3 +## Version 1.0.2 + +2014年11月8日 + +* 修正C語言接口的編譯錯誤問題 +* 修正默認簡繁轉換文件名錯誤問題 +* `DictEntry`增加`Values()`方法 + +## Version 1.0.1 + +2014年10月18日 + +* 使用C++11完全重寫OpenCC +* 修復大量轉換錯誤 +* 增加香港繁體轉換 + +## Version 0.4.3 + +2013年5月17日 * 增加接口`opencc_convert_utf8_free` * 修正Node.js插件內存泄漏問題 * 修正Windows下獲取當前目錄的問題 -## Ver 0.4.2 +## Version 0.4.2 + +2013年4月14日 * 修正「阪」、「薰」繁簡轉換 * 增加四對缺失的簡繁轉換 * 增加API文檔,由Doxygen生成 * 重構大量代碼 -## Ver 0.4.1 +## Version 0.4.1 + +2013年3月21日 * 修正Node.js 0.10兼容性問題。 * 從Unihan數據庫增加若干缺失的簡繁轉換單字。 -## Ver 0.4.0 +## Version 0.4.0 2013年3月2日 @@ -32,7 +54,7 @@ * 增加了gyp編譯系統。 * 增加了Node.js接口。 -## Ver 0.3.0 +## Version 0.3.0 2011年12月2日 @@ -47,7 +69,7 @@ * 增加「毀」「譭」「燬」對立。 * 增加「背」「揹」對立。 -## Ver 0.2.0 +## Version 0.2.0 2010年12月23日 @@ -58,7 +80,7 @@ * 使用CMake代替Autotools構建編譯框架。 * 修正包括「拿不準」在內諸多簡繁轉換問題。 -## Ver 0.1.2 +## Version 0.1.2 2010年9月16日 @@ -69,7 +91,7 @@ * 修正輸入爲空時轉換的Bug。 * 改進opencc命令行工具參數提示和幫助。 -## Ver 0.1.1 +## Version 0.1.1 2010年8月10日 @@ -82,7 +104,7 @@ * 增加編譯時的測試。 * 分離辭典爲字典和詞典。 -## Ver 0.1.0 +## Version 0.1.0 2010年7月28日 @@ -94,7 +116,7 @@ * 增加「岳嶽」一簡對多繁轉換。 * 隱藏不必要的類型,更新接口註釋。 -## Ver 0.0.5 +## Version 0.0.5 2010年7月21日 @@ -103,7 +125,7 @@ * 修正一個文件名緩衝區分配的問題。 * 增加「囉」「溼」「廕」「彷」「徵」繁簡轉換。 -## Ver 0.0.4 +## Version 0.0.4 2010年7月16日 @@ -113,7 +135,7 @@ * 修正辭典加載兼容性問題,當無法mmap時直接申請內存。 * 修正C++接口在64位平臺下編譯的問題。 -## Ver 0.0.3 +## Version 0.0.3 2010年6月22日 @@ -122,7 +144,7 @@ * 增加辭典配置文件支持。 * 修正一些兼容性Bug。 -## Ver 0.0.2 +## Version 0.0.2 2010年6月19日 @@ -131,7 +153,7 @@ * 增加平面文件詞庫到`Datrie`詞庫的轉換工具`opencc_dict`。 * 提供UTF8文本直接轉換的接口。 -## Ver 0.0.1 +## Version 0.0.1 2010年6月11日 diff --git a/README.md b/README.md index 5495f83..65777fc 100644 --- a/README.md +++ b/README.md @@ -1,138 +1,133 @@ -# Open Chinese Convert +# Open Chinese Convert 開放中文轉換 -## Introduction +## Introduction 介紹 Open Chinese Convert (OpenCC, 開放中文轉換) is an opensource project for conversion between Traditional Chinese and Simplified Chinese, supporting character-level conversion, phrase-level conversion, variant conversion and regional idioms among Mainland China, Taiwan and Hong kong. -中文簡繁轉換開源項目,支持詞彙級别的轉換、異體字轉換和地區習慣用詞轉換(中國大陸、臺灣、香港)。 +中文簡繁轉換開源項目,支持詞彙級別的轉換、異體字轉換和地區習慣用詞轉換(中國大陸、臺灣、香港)。 -### OpenCC特點 +### Features 特點 * 嚴格區分「一簡對多繁」和「一簡對多異」。 * 完全兼容異體字,可以實現動態替換。 * 嚴格審校一簡對多繁詞條,原則爲「能分則不合」。 * 支持中國大陸、臺灣、香港異體字和地區習慣用詞轉換,如「裏」「裡」、「鼠標」「滑鼠」。 -* 使用歧義分割+最少分詞算法,儘可能從技術上優化轉換效果。 * 詞庫和函數庫完全分離,可以自由修改、導入、擴展。 * 支持C、C++、Python、PHP、Java、Ruby、Node.js。 * 兼容Windows、Linux、Mac平臺。 -* 已經用於ibus-pinyin、fcitx的繁體模式輸入。 -## Links +### Links 相關鏈接 -### Project home page -http://code.google.com/p/opencc/ +* Introduction 詳細介紹 https://github.com/BYVoid/OpenCC/wiki/%E7%B7%A3%E7%94%B1 +* OpenCC Online (在線轉換) http://opencc.byvoid.com/ +* 現代漢語常用簡繁一對多字義辨析表 http://ytenx.org/byohlyuk/KienxPyan -### Introduction (詳細介紹) -https://code.google.com/p/opencc/wiki/Introduction +## Installation 安裝 -### Development Documentation -http://byvoid.github.io/OpenCC/ +* [Debian](http://packages.qa.debian.org/o/opencc.html) +* [Ubuntu](https://launchpad.net/ubuntu/+source/opencc) +* [Fedora](https://admin.fedoraproject.org/pkgdb/package/opencc/) +* [Arch Linux](https://www.archlinux.org/packages/community/x86_64/opencc/) +* [Mac OS](https://github.com/mxcl/homebrew/blob/master/Library/Formula/opencc.rb) +* [Node.js](https://npmjs.org/package/opencc) -### Source Code on Github -https://github.com/byvoid/opencc +## Download 下載 -### OpenCC Online (在線轉換) -http://opencc.byvoid.com/ +https://bintray.com/byvoid/opencc/OpenCC -### 現代漢語常用簡繁一對多字義辨析表 -http://ytenx.org/byohlyuk/KienxPyan +## Usage 使用 -### Projects using Opencc +### Command Line 命令行 -* [ibus-pinyin](http://code.google.com/p/ibus/) -* [fcitx](http://code.google.com/p/fcitx/) -* [rimeime](http://code.google.com/p/rimeime/) -* [libgooglepinyin](http://code.google.com/p/libgooglepinyin/) -* [ibus-libpinyin](https://github.com/libpinyin/ibus-libpinyin) -* [BYVBlog](https://github.com/byvoid/byvblog) -* [豆瓣同城微信](http://weixinqiao.com/douban-event/) - -## Installation - -### [Debian](http://packages.qa.debian.org/o/opencc.html)/[Ubuntu](https://launchpad.net/ubuntu/+source/opencc) - - apt-get install opencc - -### [Fedora](https://admin.fedoraproject.org/pkgdb/acls/name/opencc) - - yum install opencc - -### [Arch](https://www.archlinux.org/packages/community/x86_64/opencc/) - - pacman -S opencc +`opencc --help` -### [Mac](https://github.com/mxcl/homebrew/blob/master/Library/Formula/opencc.rb) +### Configurations 配置文件 - brew install opencc +#### 預設配置文件 -### [Node.js](https://npmjs.org/package/opencc) +* `s2t.json` Simplified Chinese to Traditional Chinese 簡體到繁體 +* `t2s.json` Traditional Chinese to Simplified Chinese 繁體到簡體 +* `s2tw.json` Simplified Chinese to Traditional Chinese (Taiwan Standard) 簡體到臺灣正體 +* `tw2s.json` Traditional Chinese (Taiwan Standard) to Simplified Chinese 臺灣正體到簡體 +* `s2hk.json` Simplified Chinese to Traditional Chinese (Hong Kong Standard) 簡體到香港繁體(香港小學學習字詞表標準) +* `hk2s.json` Traditional Chinese (Hong Kong Standard) to Simplified Chinese 香港繁體(香港小學學習字詞表標準)到簡體 +* `s2twp.json` Simplified Chinese to Traditional Chinese (Taiwan Standard) with Taiwanese idiom 簡體到繁體(臺灣正體標準)並轉換爲臺灣常用詞彙 +* `tw2sp.json` Traditional Chinese (Taiwan Standard) to Simplified Chinese with Mainland Chinese idiom 繁體(臺灣正體標準)到簡體並轉換爲中國大陸常用詞彙 - npm install opencc +## Development Documentation 開發文檔 -## Usage +* http://byvoid.github.io/OpenCC/ - $ opencc --help - - Open Chinese Convert (OpenCC) Command Line Tool +## Build 編譯 - Author: BYVoid - Bug Report: http://github.com/BYVoid/OpenCC/issues - - Usage: - opencc [Options] - - Options: - -i [file], --input=[file] Read original text from [file]. - -o [file], --output=[file] Write converted text to [file]. - -c [file], --config=[file] Load configuration of conversion from [file]. - -v, --version Print version and build information. - -h, --help Print this help. - - With no input file, reads standard input and writes converted stream to standard output. - Default configuration(zhs2zht.ini) will be loaded if not set. - -## Build +[![Build Status](https://travis-ci.org/BYVoid/OpenCC.png?branch=master)](https://travis-ci.org/BYVoid/OpenCC) ### Build with CMake -Make a directory and check in: +Linux/OSX (gcc 4.6 or clang 3.2 is required): - mkdir build - cd build +``` +make +sudo make install +``` -Build sources: +Windows MSYS: - cmake -DCMAKE_INSTALL_PREFIX=/usr -DCMAKE_BUILD_TYPE=Release -D ENABLE_GETTEXT:BOOL=ON .. - make +``` +cmake .. -G "MSYS Makefiles" -DCMAKE_INSTALL_PREFIX="" -DCMAKE_BUILD_TYPE=Release +make +``` -On windows, run these commands instead: +Windows Visual Studio (2013 or higher required): - cmake .. -G "MSYS Makefiles" -DCMAKE_INSTALL_PREFIX="" -DCMAKE_BUILD_TYPE=Release -DENABLE_GETTEXT:BOOL=OFF - make +``` +cmake .. -G "Visual Studio 12" -DCMAKE_INSTALL_PREFIX="" -DCMAKE_BUILD_TYPE=Release +make +``` -Install: +### Projects using Opencc 使用OpenCC的項目 - sudo make install - - -### Build with gyp +* [ibus-pinyin](http://code.google.com/p/ibus/) +* [fcitx](http://code.google.com/p/fcitx/) +* [rimeime](http://code.google.com/p/rimeime/) +* [libgooglepinyin](http://code.google.com/p/libgooglepinyin/) +* [ibus-libpinyin](https://github.com/libpinyin/ibus-libpinyin) +* [BYVBlog](https://github.com/byvoid/byvblog) +* [豆瓣同城微信](http://weixinqiao.com/douban-event/) - mkdir build - gyp --depth . -D library=shared_library -f make --generator-output=build opencc.gyp - make -C build +## License 許可協議 -## Screenshot +Apache License 2.0 -![OpenCC Mac](http://opencc.googlecode.com/files/screenshot-gui-mac.png) +## Third Party Library 第三方庫 -![OpenCC Windows](http://opencc.googlecode.com/files/screenshot-gui.png) +* [darts-clone](https://code.google.com/p/darts-clone/) BSD License +* [tclap](http://tclap.sourceforge.net/) MIT License +* [rapidjson](https://github.com/miloyip/rapidjson) MIT License -![OpenCC Ubuntu](http://opencc.googlecode.com/files/screenshot-gui-ubuntu.png) +All these libraries are statically linked. -## Contributors +## Contributors 貢獻者 * [BYVoid](http://www.byvoid.com/) -* 佛振 -* Peng Huang -* LI Daobing +* [佛振](https://github.com/lotem) +* [Peng Huang](https://github.com/phuang) +* [LI Daobing](https://github.com/lidaobing) +* [Kefu Chai](https://github.com/tchaikov) +* [Kan-Ru Chen](http://kanru.info/) +* [Ma Xiaojun](https://twitter.com/damage3025) +* [Jiang Jiang](http://jjgod.org/) +* [Ruey-Cheng Chen](https://github.com/rueycheng) +* [Paul Meng](http://home.mno2.org/) +* [Lawrence Lau](https://github.com/ktslwy) +* [瑾昀](https://github.com/kunki) +* [Marguerite Su](https://www.marguerite.su/) +* [Brian White](http://mscdex.net) +* [Qijiang Fan](https://fqj.me/) +* [LEOYoon-Tsaw](https://github.com/LEOYoon-Tsaw) +* [Steven Yao](https://github.com/stevenyao) +* [Pellaeon Lin](https://github.com/pellaeon) +* [stony](https://github.com/stony-shixz) +* [steelywing](https://github.com/steelywing) +* [吕旭东](https://github.com/lvxudong) +* [Weng Xuetian](https://github.com/wengxt) diff --git a/binding.gyp b/binding.gyp index 28bd7e3..a86dcb8 100644 --- a/binding.gyp +++ b/binding.gyp @@ -1,27 +1,9 @@ { "includes": [ - "gypi/global.gypi", - "gypi/configs.gypi", - "gypi/dicts.gypi", - ], - "targets": [{ - "target_name": "binding", - "sources": [ - "node/binding.cc", - "src/config_reader.c", - "src/converter.c", - "src/dict_group.c", - "src/dict_chain.c", - "src/encoding.c", - "src/utils.c", - "src/opencc.c", - "src/dict.c", - "src/dictionary/datrie.c", - "src/dictionary/text.c" - ], - "dependencies": [ - "configs", - "dicts", - ] - }] + "node/global.gypi", + "node/configs.gypi", + "node/opencc_dict.gypi", + "node/dicts.gypi", + "node/node_binding.gypi", + ] } diff --git a/data/CMakeLists.txt b/data/CMakeLists.txt index 31c4e56..87aa98b 100644 --- a/data/CMakeLists.txt +++ b/data/CMakeLists.txt @@ -1,101 +1,152 @@ +set(OPENCC_DICT_BIN opencc_dict) +set(DICT_MERGE_BIN python ${CMAKE_CURRENT_SOURCE_DIR}/scripts/merge.py) +set(DICT_REVERSE_BIN python ${CMAKE_CURRENT_SOURCE_DIR}/scripts/reverse.py) +set(DICT_DIR ${CMAKE_CURRENT_SOURCE_DIR}/dictionary) +set(DICT_GENERATED_DIR ${CMAKE_CURRENT_BINARY_DIR}) + set( - OPENCC_DICT_BIN - ${CMAKE_BINARY_DIR}/src/tools/opencc_dict + DICTS_RAW + STCharacters + STPhrases + TSCharacters + TSPhrases + TWVariants + TWVariantsRevPhrases + JPVariants + HKVariants + HKVariantsPhrases + HKVariantsRevPhrases ) -add_custom_target( - ocds - ALL - DEPENDS - simp_to_trad_characters.ocd - simp_to_trad_phrases.ocd - trad_to_simp_characters.ocd - trad_to_simp_phrases.ocd +set( + DICTS_GENERATED + TWPhrases + TWPhrasesRev + TWVariantsRev + HKVariantsRev ) -foreach(DICT_PREFIX simp_to_trad trad_to_simp) - - foreach(DICT_SURFIX characters phrases) +set(DICTS ${DICTS_RAW} ${DICTS_GENERATED}) - set (DICT ${DICT_PREFIX}_${DICT_SURFIX}) +foreach(DICT ${DICTS}) + set(DICT_TARGETS ${DICT_TARGETS} ${DICT}.ocd) +endforeach(DICT) - add_custom_command( - OUTPUT - ${DICT}.ocd - COMMENT - "Building ${DICT}.ocd" - COMMAND - ${OPENCC_DICT_BIN} -i ${CMAKE_SOURCE_DIR}/data/${DICT_PREFIX}/${DICT_SURFIX}.txt -o ${DICT}.ocd - DEPENDS opencc_dict - ) - - install( - FILES - ${CMAKE_BINARY_DIR}/data/${DICT}.ocd - DESTINATION - ${DIR_SHARE_OPENCC} - ) +add_custom_target( + Dictionaries + ALL + DEPENDS + ${DICT_TARGETS} +) - set_directory_properties( - PROPERTIES - ADDITIONAL_MAKE_CLEAN_FILES - "${CMAKE_BINARY_DIR}/data/${DICT}.ocd" - ) +foreach(DICT ${DICTS_RAW}) + set(DICT_${DICT}_INPUT ${DICT_DIR}/${DICT}.txt) +endforeach(DICT) - endforeach(DICT_SURFIX) -endforeach(DICT_PREFIX) +foreach(DICT ${DICTS_GENERATED}) + set(DICT_${DICT}_INPUT ${DICT_GENERATED_DIR}/${DICT}.txt) +endforeach(DICT) -set(CONFIG_FILES - config/zhs2zht.ini - config/zht2zhs.ini - config/mix2zht.ini - config/mix2zhs.ini - config/zhs2zhtw_p.ini - config/zhs2zhtw_v.ini - config/zhs2zhtw_vp.ini - config/zht2zhtw_p.ini - config/zht2zhtw_v.ini - config/zht2zhtw_vp.ini - config/zhtw2zht.ini - config/zhtw2zhs.ini - config/zhtw2zhcn_s.ini - config/zhtw2zhcn_t.ini +set( + DICT_TWPhrases_GENERATING_INPUT + ${DICT_DIR}/TWPhrasesIT.txt + ${DICT_DIR}/TWPhrasesName.txt + ${DICT_DIR}/TWPhrasesOther.txt +) +set( + DICT_TWPhrases_GENERATING_COMMAND + ${DICT_MERGE_BIN} ${DICT_TWPhrases_GENERATING_INPUT} TWPhrases.txt ) -install( - FILES - ${CONFIG_FILES} - DESTINATION - ${DIR_SHARE_OPENCC} +set( + DICT_TWVariantsRev_GENERATING_INPUT + ${DICT_DIR}/TWVariants.txt +) +set( + DICT_TWVariantsRev_GENERATING_COMMAND + ${DICT_REVERSE_BIN} ${DICT_TWVariantsRev_GENERATING_INPUT} TWVariantsRev.txt ) -set(TAIWAN_DICT_FILES - tw/to_tw_variants.txt - tw/to_tw_phrases.txt - tw/from_tw_variants.txt - tw/from_tw_phrases.txt +set( + DICT_TWPhrasesRev_GENERATING_INPUT + ${DICT_GENERATED_DIR}/TWPhrases.txt +) +set( + DICT_TWPhrasesRev_GENERATING_COMMAND + ${DICT_REVERSE_BIN} ${DICT_TWPhrasesRev_GENERATING_INPUT} TWPhrasesRev.txt ) -install( - FILES - ${TAIWAN_DICT_FILES} - DESTINATION - ${DIR_SHARE_OPENCC} +set( + DICT_HKVariantsRev_GENERATING_INPUT + ${DICT_DIR}/HKVariants.txt +) +set( + DICT_HKVariantsRev_GENERATING_COMMAND + ${DICT_REVERSE_BIN} ${DICT_HKVariantsRev_GENERATING_INPUT} HKVariantsRev.txt ) -set(CHINA_DICT_FILES - cn/to_cn_phrases.txt +foreach(DICT ${DICTS_GENERATED}) + add_custom_command( + OUTPUT + ${DICT}.txt + COMMENT + "Generating ${DICT}.txt" + COMMAND + ${DICT_${DICT}_GENERATING_COMMAND} + DEPENDS + ${DICT_${DICT}_GENERATING_INPUT} + ) + set_directory_properties( + PROPERTIES + ADDITIONAL_MAKE_CLEAN_FILES + "${DICT_GENERATED_DIR}/${DICT}.txt" + ) +endforeach(DICT) + +foreach(DICT ${DICTS}) + add_custom_command( + OUTPUT + ${DICT}.ocd + COMMENT + "Building ${DICT}.ocd" + COMMAND + ${OPENCC_DICT_BIN} + --input ${DICT_${DICT}_INPUT} + --output ${DICT}.ocd + --from text + --to ocd + DEPENDS + ${OPENCC_DICT_BIN} + ${DICT_${DICT}_INPUT} + ) + + install( + FILES + ${DICT_GENERATED_DIR}/${DICT}.ocd + DESTINATION + ${DIR_SHARE_OPENCC} + ) + + set_directory_properties( + PROPERTIES + ADDITIONAL_MAKE_CLEAN_FILES + "${DICT_GENERATED_DIR}/${DICT}.ocd" + ) +endforeach(DICT) + +set(CONFIG_FILES + config/s2t.json + config/t2s.json + config/s2tw.json + config/s2twp.json + config/tw2s.json + config/s2hk.json + config/hk2s.json ) install( - FILES - ${CHINA_DICT_FILES} - DESTINATION - ${DIR_SHARE_OPENCC} + FILES + ${CONFIG_FILES} + DESTINATION + ${DIR_SHARE_OPENCC} ) - -if (BUILD_TESTING) - foreach (DICT ${TAIWAN_DICT_FILES} ${CHINA_DICT_FILES}) - configure_file(${DICT} ${PROJECT_BINARY_DIR}/data COPYONLY) - endforeach (DICT) -endif (BUILD_TESTING) diff --git a/data/cn/cn-it.txt b/data/cn/cn-it.txt deleted file mode 100644 index 3297b23..0000000 --- a/data/cn/cn-it.txt +++ /dev/null @@ -1,338 +0,0 @@ -PN接面 PN結 -SQL隱碼攻擊 SQL注入 SQL注入攻擊 -三極體 三極管 -下拉選單 下拉列表 -中介軟體 中間件 -串列埠 串口 串行端口 -主機板 主板 -主開機記錄 主引導記錄 -乙太網 以太網 -二極體 二極管 -互動 交互 -互動式 交互式 -人工智慧 人工智能 -介面 接口 界面 -介面卡 適配器 -代碼 代碼 -伺服器 服務器 -佇列 隊列 -位元 比特 -位元率 比特率 -位元組 字節 -位元速率 碼率 -位址 地址 -位址列 地址欄 -低級 低級 -低階 低級 -作業系統 操作系統 -使用者 用戶 -來電轉駁 呼叫轉移 -例項 實例 -信號 信號 -偵錯 調試 -偵錯程式 調試器 -傷心小棧 紅心大戰 -價效比 性價比 -優先順序 優先級 -儲存 保存 -元件 組件 -光碟 光盤 -光碟機 光驅 -入口網站 門戶網站 -內建 內置 -內碼表 代碼頁 -全形 全角 -全球資訊網 萬維網 -函數語言程式設計 函數式編程 -刀鋒伺服器 刀片服務器 -分散式 分佈式 -分時多工 時分複用 -分時多重進接 時分多址 -分碼多重進接 碼分多址 -分空間多重進接 空分多址 -分頻多工 頻分複用 -分頻多重進接 頻分多址 -列印 打印 -列舉 枚舉 -剪下 剪切 -剪貼簿 剪貼板 -副檔名 擴展名 文件擴展名 -匯入 導入 -匯出 導出 -匯流排 總線 -區域網 局域網 -半形 半角 -印表機 打印机 -原始檔 源文件 -原始碼 源代碼 -原生代碼 本地代碼 -參數列 參數表 -取樣 採樣 -取樣率 采样率 -名稱空間 命名空間 -向量 矢量 -呼叫 調用 -命令列 命令行 -啟用 激活 -單核心 宏內核 -回撥 回調 -圖示 圖標 -地址 地址 -埠 端口 -執行緒 線程 -執行長 首席執行官 -壁紙 壁紙 -外來鍵 外鍵 -外接 外置 -多型 多態 -多執行緒 多線程 -多工 多任務 -太空梭 航天飛機 -字元 字符 -字型 字體 -字型檔 字庫 -字尾 後綴 -字符集 字符集 -字首 前綴 -存取 訪問 -存檔 存盤 -宕機 死機 -定址 尋址 -實例 實例 -實體地址 物理地址 -實體記憶體 物理内存 -寬頻 寬帶 -對映 映射 -對話方塊 對話框 -對象 對象 -巢狀 嵌套 -工作列 任務欄 -工作管理員 任務管理器 -平行計算 並行計算 -序列 串行 -序號產生器 註冊機 -建構函式 構造函數 -彙編 彙編 -影印 複印 -影象 圖像 -後設資料 元數據 -循環 循環 -微控制器 單片機 -快取 緩存 -快取記憶體 高速緩存 -快閃記憶體 閃存 -感測 傳感 -打開 打開 -技術長 首席技術官 -指令式程式設計 命令式編程 -指令碼 腳本 -掃描器 掃描儀 -排程 調度 -控制代碼 句柄 -控制項 控件 -摺積 捲積 -擷取 截取 -攜帶型 便攜式 -支持者 支持者 -支援 支持 -效能 性能 -整合 集成 -數位 數字 -數位印刷 數字印刷 -數位電子 數字電子 -數位電路 數字電路 -數字 數字 -數據機 調製解調器 -文件 文檔 -文字 文本 -文書處理 文字處理 -映象 鏡像 -映象管 顯像管 -時脈頻率 時鐘頻率 -晶片 芯片 -智慧 智能 -智慧財產權 知識產權 -有失真壓縮 有損壓縮 -核心 內核 -桌上型 桌面型 -桌上型電腦 臺式機 -桌布 壁紙 -標頭檔案 頭文件 -模擬 仿真 模擬 -模組 模塊 -檔名 文件名 -檔案 文件 -檢視 查看 視圖 -欄位 字段 -正規化 範式 -正規表示式 正則表達式 -氣泡跑需 冒泡排序 -永續性 持久性 -波長分波多工 波分複用 -消息 消息 -游標 光標 -溢位 溢出 -滑鼠 鼠標 -演算法 算法 -無失真壓縮 無損壓縮 -燒錄 刻錄 -營運長 首席運營官 -物件 對象 -物件導向 面向對象 -狀態列 狀態欄 -畫素 像素 -登入 登錄 -登出 註銷 -目的碼 目標代碼 -直譯器 解釋器 -相容 兼容 -相簿 相冊 -真實模式 實模式 -硬碟 硬盤 -硬體 硬件 -碟片 盤片 -碟符 盤符 -磁碟 磁盤 -磁軌 磁道 -社區 社區 -社羣 社區 -程序 進程 -程序導向 面向過程 -程序式程式設計 過程式編程 -程式 程序 -程式碼 代碼 -程式設計 編程 -程式設計師 程序員 -程式語言 編程語言 -稽覈 審覈 -積體電路 集成電路 -空間多工 空分複用 -簡報 演示文稿 -簡訊 短信 -粘貼 粘貼 -終端使用者 最終用戶 -組合語言 彙編語言 -組譯 彙編 -網咖 網吧 -網路 網絡 -網路上的芳鄰 網上鄰居 -網際網路 互聯網 -線上 在線 -縮圖 縮略圖 -縮排 縮進 -繫結 綁定 -膝上型電腦 筆記本電腦 -菜單 菜單 -藍芽 藍牙 -虛擬函式 虛函數 -虛擬機器 虛擬機 -虛擬碼 僞代碼 -螢幕 屏幕 -行內函數 內聯函數 -行動式 便攜式 -行動硬碟 移動硬盤 -行動通訊 移動通信 -行動電話 移動電話 -表示式 表達式 -裝置 設備 -覈取按鈕 複選按鈕 -覈取方塊 複選框 -視窗 窗口 -視覺化 可視化 -視訊 視頻 -視訊記憶體 顯存 -解析度 分辨率 -解構函式 析構函數 -解除安裝 卸載 -觸控式螢幕 觸摸屏 -訊息 消息 -訊號 信號 -訊雜比 信噪比 -訪問 訪問 -設定 設置 -許可權 權限 -調色盤 調色板 -調變 調制 -變數 變量 -貼上 粘貼 -資料 數據 -資料來源 數據源 -資料倉儲 數據倉庫 -資料包 數據報 -資料夾 文件夾 -資料庫 數據庫 -資料探勘 數據挖掘 -資訊 信息 -資訊保安 信息安全 -資訊理論 信息論 -資訊科技 信息技術 -資訊長 首席信息官 -超程式設計 元編程 -軟碟機 軟驅 -軟體 軟件 -載入 加載 -載入程式 引導程序 -迴圈 循環 -通訊 通信 -通道 信道 -連結 鏈接 -連結串列 鏈表 -連線 連接 -進位制 進制 -進程 進程 -進階 高端 高級 -運算元 算子 -過載 重載 -遞迴 遞歸 -遠端 遠程 -遮蔽 屏蔽 -選單 菜單 -邏輯閘 邏輯門 -部落格 博客 -都會網路 城域王 -釋出 發佈 -重新命名 重命名 -重新整理 刷新 -重灌 重裝 -金氧半導體 金屬氧化物半導體 -金鑰 密鑰 -錄影 錄像 -鐳射 激光 -開啟 打開 -閘流體 晶閘管 -閘道器 網關 -閘電路 門電路 -關聯式資料庫 關係數據庫 -防寫 寫保護 -防毒 殺毒 -陣列 數組 -雜湊 哈希 散列 -離線 脫機 -雲端儲存 雲存儲 -雲端計算 雲計算 -電腦保安 計算機安全 -電腦科學 計算機科學 -非同步 異步 -面板 皮膚 -音效卡 聲卡 -音訊 音頻 -頁尾 頁腳 -頁首 頁眉 -預設 缺省 -預設值 默認值 -頻寬 帶寬 -類别範本 類模板 -類比 模擬 -類比電子 模擬電子 -類比電路 模擬電路 -顯示卡 顯卡 -高級 高級 -高階 高端 高級 -高階函數 高階函數 -點選 點擊 -點陣圖 位圖 -隨機亂數 随机数 -型別 類型 -型別註釋 類型簽名 -純粹函數式程式語言 純函數式編程語言 -運算子 運算符 diff --git a/data/cn/cn-name.txt b/data/cn/cn-name.txt deleted file mode 100644 index 60aa9a3..0000000 --- a/data/cn/cn-name.txt +++ /dev/null @@ -1,81 +0,0 @@ -亞塞拜然 阿塞拜疆 -傅立葉 傅里葉 -克羅埃西亞 克羅地亞 -列支敦斯登 列支敦士登 -加彭 加蓬 -千里達及托巴哥 特立尼達和多巴哥 -卡達 卡塔爾 -厄利垂亞 厄立特里亞 -厄瓜多 厄瓜多爾 -史瓦濟蘭 斯威士蘭 -吉布地 吉布堤 -吉里巴斯 基里巴斯 -吐瓦魯 圖瓦盧 -哈薩克 哈萨克斯坦 -哥斯大黎加 哥斯達黎加 -喬治亞 格魯吉亞 -土庫曼 土庫曼斯坦 -坦尚尼亞 坦桑尼亞 -塔吉克 塔吉克斯坦 -塞席爾 塞舌爾 -塞普勒斯 塞浦路斯 -夏農 香農 -多明尼加 多米尼加 -奈及利亞 尼日利亞 -安地卡及巴布達 安提瓜和巴布達 -宏都拉斯 洪都拉斯 -寮國 老撾 -尚比亞 贊比亞 -尤拉 歐拉 -尼日 尼日爾 -巴布亞紐幾內亞 巴布亞新幾内亚 -巴貝多 巴巴多斯 -布吉納法索 布基納法索 -帛琉 帕劳 -幾內亞比索 幾內亞比紹 -快捷半導體 仙童半導體 -斯洛維尼亞 斯洛文尼亞 -查德 乍得 -格瑞那達 格林納達 -模里西斯 毛里求斯 -汶萊 文莱 -沙烏地阿拉伯 沙特阿拉伯 -波士尼亞赫塞哥維納 波斯尼亞黑塞哥維那 -波札那 博茨瓦納 -烏茲別克 烏茲別克斯坦 -獅子山 塞拉利昂 -瓜地馬拉 危地馬拉 -甘比亞 岡比亞 -盧安達 盧旺達 -突尼西亞 突尼斯 -紐西蘭 新西蘭 -索羅門羣島 所羅門羣島 -索馬利亞 索馬里 -維德角 佛得角 -義大利 意大利 -聖克里斯多福及尼維斯 聖基茨和尼維斯 -聖文森及格瑞那丁 聖文森特和格林納丁斯 -聖露西亞 聖盧西亞 -聖馬利諾 聖馬力諾 -肯亞 肯尼亞 -茅利塔尼亞 毛里塔尼亞 -莫三比克 莫桑比克 -萬那杜 瓦努阿圖 -葉門 也門 -葛摩 科摩羅 -蒲隆地 布隆迪 -蓋亞那 圭亞那 -蘇利南 蘇裏南 -衣索比亞 埃塞俄比亞 -諾魯 瑙魯 -象牙海岸 科特迪瓦 -貝南 貝寧 -貝里斯 伯利茲 -賴比瑞亞 利比里亞 -賴索托 萊索托 -辛巴威 津巴布韋 -迦納 加納 -那杜 溫納圖萬 -阿拉伯聯合大公國 阿拉伯聯合酋長國 -馬利共和國 馬里共和國 -馬爾地夫 馬爾代夫 diff --git a/data/cn/cn-other.txt b/data/cn/cn-other.txt deleted file mode 100644 index 49aa791..0000000 --- a/data/cn/cn-other.txt +++ /dev/null @@ -1,10 +0,0 @@ -乳酪 奶酪 -冷盤 凉菜 -子音 輔音 -母音 元音 -片語 詞組 -矽 硅 -笨豬跳 蹦极 -計程車 出租车 -賓士 奔馳 -速食麵 方便麵 \ No newline at end of file diff --git a/data/cn/merge.sh b/data/cn/merge.sh deleted file mode 100755 index 1b25c7c..0000000 --- a/data/cn/merge.sh +++ /dev/null @@ -1 +0,0 @@ -python ../scripts/merge.py cn-it.txt cn-other.txt cn-name.txt to_cn_phrases.txt diff --git a/data/cn/to_cn_phrases.txt b/data/cn/to_cn_phrases.txt deleted file mode 100644 index 8a41d38..0000000 --- a/data/cn/to_cn_phrases.txt +++ /dev/null @@ -1,429 +0,0 @@ -PN接面 PN結 -SQL隱碼攻擊 SQL注入 SQL注入攻擊 -三極體 三極管 -下拉選單 下拉列表 -中介軟體 中間件 -串列埠 串口 串行端口 -主機板 主板 -主開機記錄 主引導記錄 -乙太網 以太網 -乳酪 奶酪 -二極體 二極管 -互動 交互 -互動式 交互式 -亞塞拜然 阿塞拜疆 -人工智慧 人工智能 -介面 接口 界面 -介面卡 適配器 -代碼 代碼 -伺服器 服務器 -佇列 隊列 -位元 比特 -位元率 比特率 -位元組 字節 -位元速率 碼率 -位址 地址 -位址列 地址欄 -低級 低級 -低階 低級 -作業系統 操作系統 -使用者 用戶 -來電轉駁 呼叫轉移 -例項 實例 -信號 信號 -偵錯 調試 -偵錯程式 調試器 -傅立葉 傅里葉 -傷心小棧 紅心大戰 -價效比 性價比 -優先順序 優先級 -儲存 保存 -元件 組件 -光碟 光盤 -光碟機 光驅 -克羅埃西亞 克羅地亞 -入口網站 門戶網站 -內建 內置 -內碼表 代碼頁 -全形 全角 -全球資訊網 萬維網 -冷盤 凉菜 -函數語言程式設計 函數式編程 -刀鋒伺服器 刀片服務器 -分散式 分佈式 -分時多工 時分複用 -分時多重進接 時分多址 -分碼多重進接 碼分多址 -分空間多重進接 空分多址 -分頻多工 頻分複用 -分頻多重進接 頻分多址 -列印 打印 -列支敦斯登 列支敦士登 -列舉 枚舉 -剪下 剪切 -剪貼簿 剪貼板 -副檔名 擴展名 文件擴展名 -加彭 加蓬 -匯入 導入 -匯出 導出 -匯流排 總線 -區域網 局域網 -千里達及托巴哥 特立尼達和多巴哥 -半形 半角 -卡達 卡塔爾 -印表機 打印机 -厄利垂亞 厄立特里亞 -厄瓜多 厄瓜多爾 -原始檔 源文件 -原始碼 源代碼 -原生代碼 本地代碼 -參數列 參數表 -取樣 採樣 -取樣率 采样率 -史瓦濟蘭 斯威士蘭 -吉布地 吉布堤 -吉里巴斯 基里巴斯 -名稱空間 命名空間 -吐瓦魯 圖瓦盧 -向量 矢量 -呼叫 調用 -命令列 命令行 -哈薩克 哈萨克斯坦 -哥斯大黎加 哥斯達黎加 -啟用 激活 -喬治亞 格魯吉亞 -單核心 宏內核 -回撥 回調 -圖示 圖標 -土庫曼 土庫曼斯坦 -地址 地址 -坦尚尼亞 坦桑尼亞 -型別 類型 -型別註釋 類型簽名 -埠 端口 -執行緒 線程 -執行長 首席執行官 -塔吉克 塔吉克斯坦 -塞席爾 塞舌爾 -塞普勒斯 塞浦路斯 -壁紙 壁紙 -夏農 香農 -外來鍵 外鍵 -外接 外置 -多型 多態 -多執行緒 多線程 -多工 多任務 -多明尼加 多米尼加 -太空梭 航天飛機 -奈及利亞 尼日利亞 -子音 輔音 -字元 字符 -字型 字體 -字型檔 字庫 -字尾 後綴 -字符集 字符集 -字首 前綴 -存取 訪問 -存檔 存盤 -安地卡及巴布達 安提瓜和巴布達 -宏都拉斯 洪都拉斯 -宕機 死機 -定址 尋址 -實例 實例 -實體地址 物理地址 -實體記憶體 物理内存 -寬頻 寬帶 -寮國 老撾 -對映 映射 -對話方塊 對話框 -對象 對象 -尚比亞 贊比亞 -尤拉 歐拉 -尼日 尼日爾 -巢狀 嵌套 -工作列 任務欄 -工作管理員 任務管理器 -巴布亞紐幾內亞 巴布亞新幾内亚 -巴貝多 巴巴多斯 -布吉納法索 布基納法索 -帛琉 帕劳 -平行計算 並行計算 -幾內亞比索 幾內亞比紹 -序列 串行 -序號產生器 註冊機 -建構函式 構造函數 -彙編 彙編 -影印 複印 -影象 圖像 -後設資料 元數據 -循環 循環 -微控制器 單片機 -快取 緩存 -快取記憶體 高速緩存 -快捷半導體 仙童半導體 -快閃記憶體 閃存 -感測 傳感 -打開 打開 -技術長 首席技術官 -指令式程式設計 命令式編程 -指令碼 腳本 -掃描器 掃描儀 -排程 調度 -控制代碼 句柄 -控制項 控件 -摺積 捲積 -擷取 截取 -攜帶型 便攜式 -支持者 支持者 -支援 支持 -效能 性能 -整合 集成 -數位 數字 -數位印刷 數字印刷 -數位電子 數字電子 -數位電路 數字電路 -數字 數字 -數據機 調製解調器 -文件 文檔 -文字 文本 -文書處理 文字處理 -斯洛維尼亞 斯洛文尼亞 -映象 鏡像 -映象管 顯像管 -時脈頻率 時鐘頻率 -晶片 芯片 -智慧 智能 -智慧財產權 知識產權 -有失真壓縮 有損壓縮 -查德 乍得 -核心 內核 -格瑞那達 格林納達 -桌上型 桌面型 -桌上型電腦 臺式機 -桌布 壁紙 -標頭檔案 頭文件 -模擬 仿真 模擬 -模組 模塊 -模里西斯 毛里求斯 -檔名 文件名 -檔案 文件 -檢視 查看 視圖 -欄位 字段 -正規化 範式 -正規表示式 正則表達式 -母音 元音 -氣泡跑需 冒泡排序 -永續性 持久性 -汶萊 文莱 -沙烏地阿拉伯 沙特阿拉伯 -波士尼亞赫塞哥維納 波斯尼亞黑塞哥維那 -波札那 博茨瓦納 -波長分波多工 波分複用 -消息 消息 -游標 光標 -溢位 溢出 -滑鼠 鼠標 -演算法 算法 -烏茲別克 烏茲別克斯坦 -無失真壓縮 無損壓縮 -燒錄 刻錄 -營運長 首席運營官 -片語 詞組 -物件 對象 -物件導向 面向對象 -狀態列 狀態欄 -獅子山 塞拉利昂 -瓜地馬拉 危地馬拉 -甘比亞 岡比亞 -畫素 像素 -登入 登錄 -登出 註銷 -盧安達 盧旺達 -目的碼 目標代碼 -直譯器 解釋器 -相容 兼容 -相簿 相冊 -真實模式 實模式 -矽 硅 -硬碟 硬盤 -硬體 硬件 -碟片 盤片 -碟符 盤符 -磁碟 磁盤 -磁軌 磁道 -社區 社區 -社羣 社區 -程序 進程 -程序導向 面向過程 -程序式程式設計 過程式編程 -程式 程序 -程式碼 代碼 -程式設計 編程 -程式設計師 程序員 -程式語言 編程語言 -稽覈 審覈 -積體電路 集成電路 -空間多工 空分複用 -突尼西亞 突尼斯 -笨豬跳 蹦极 -簡報 演示文稿 -簡訊 短信 -粘貼 粘貼 -紐西蘭 新西蘭 -純粹函數式程式語言 純函數式編程語言 -索羅門羣島 所羅門羣島 -索馬利亞 索馬里 -終端使用者 最終用戶 -組合語言 彙編語言 -組譯 彙編 -維德角 佛得角 -網咖 網吧 -網路 網絡 -網路上的芳鄰 網上鄰居 -網際網路 互聯網 -線上 在線 -縮圖 縮略圖 -縮排 縮進 -繫結 綁定 -義大利 意大利 -聖克里斯多福及尼維斯 聖基茨和尼維斯 -聖文森及格瑞那丁 聖文森特和格林納丁斯 -聖露西亞 聖盧西亞 -聖馬利諾 聖馬力諾 -肯亞 肯尼亞 -膝上型電腦 筆記本電腦 -茅利塔尼亞 毛里塔尼亞 -莫三比克 莫桑比克 -菜單 菜單 -萬那杜 瓦努阿圖 -葉門 也門 -葛摩 科摩羅 -蒲隆地 布隆迪 -蓋亞那 圭亞那 -藍芽 藍牙 -蘇利南 蘇裏南 -虛擬函式 虛函數 -虛擬機器 虛擬機 -虛擬碼 僞代碼 -螢幕 屏幕 -行內函數 內聯函數 -行動式 便攜式 -行動硬碟 移動硬盤 -行動通訊 移動通信 -行動電話 移動電話 -衣索比亞 埃塞俄比亞 -表示式 表達式 -裝置 設備 -覈取按鈕 複選按鈕 -覈取方塊 複選框 -視窗 窗口 -視覺化 可視化 -視訊 視頻 -視訊記憶體 顯存 -解析度 分辨率 -解構函式 析構函數 -解除安裝 卸載 -觸控式螢幕 觸摸屏 -計程車 出租车 -訊息 消息 -訊號 信號 -訊雜比 信噪比 -訪問 訪問 -設定 設置 -許可權 權限 -調色盤 調色板 -調變 調制 -諾魯 瑙魯 -變數 變量 -象牙海岸 科特迪瓦 -貝南 貝寧 -貝里斯 伯利茲 -貼上 粘貼 -資料 數據 -資料來源 數據源 -資料倉儲 數據倉庫 -資料包 數據報 -資料夾 文件夾 -資料庫 數據庫 -資料探勘 數據挖掘 -資訊 信息 -資訊保安 信息安全 -資訊理論 信息論 -資訊科技 信息技術 -資訊長 首席信息官 -賓士 奔馳 -賴比瑞亞 利比里亞 -賴索托 萊索托 -超程式設計 元編程 -軟碟機 軟驅 -軟體 軟件 -載入 加載 -載入程式 引導程序 -辛巴威 津巴布韋 -迦納 加納 -迴圈 循環 -通訊 通信 -通道 信道 -速食麵 方便麵 -連結 鏈接 -連結串列 鏈表 -連線 連接 -進位制 進制 -進程 進程 -進階 高端 高級 -運算元 算子 -運算子 運算符 -過載 重載 -遞迴 遞歸 -遠端 遠程 -遮蔽 屏蔽 -選單 菜單 -邏輯閘 邏輯門 -那杜 溫納圖萬 -部落格 博客 -都會網路 城域王 -釋出 發佈 -重新命名 重命名 -重新整理 刷新 -重灌 重裝 -金氧半導體 金屬氧化物半導體 -金鑰 密鑰 -錄影 錄像 -鐳射 激光 -開啟 打開 -閘流體 晶閘管 -閘道器 網關 -閘電路 門電路 -關聯式資料庫 關係數據庫 -防寫 寫保護 -防毒 殺毒 -阿拉伯聯合大公國 阿拉伯聯合酋長國 -陣列 數組 -隨機亂數 随机数 -雜湊 哈希 散列 -離線 脫機 -雲端儲存 雲存儲 -雲端計算 雲計算 -電腦保安 計算機安全 -電腦科學 計算機科學 -非同步 異步 -面板 皮膚 -音效卡 聲卡 -音訊 音頻 -頁尾 頁腳 -頁首 頁眉 -預設 缺省 -預設值 默認值 -頻寬 帶寬 -類别範本 類模板 -類比 模擬 -類比電子 模擬電子 -類比電路 模擬電路 -顯示卡 顯卡 -馬利共和國 馬里共和國 -馬爾地夫 馬爾代夫 -高級 高級 -高階 高端 高級 -高階函數 高階函數 -點選 點擊 -點陣圖 位圖 diff --git a/data/config/hk2s.json b/data/config/hk2s.json new file mode 100644 index 0000000..ab01ca8 --- /dev/null +++ b/data/config/hk2s.json @@ -0,0 +1,33 @@ +{ + "name": "Traditional Chinese (Hong Kong standard) to Simplified Chinese", + "segmentation": { + "type": "mmseg", + "dict": { + "type": "ocd", + "file": "TSPhrases.ocd" + } + }, + "conversion_chain": [{ + "dict": { + "type": "group", + "dicts": [{ + "type": "ocd", + "file": "HKVariantsRevPhrases.ocd" + }, { + "type": "ocd", + "file": "HKVariantsRev.ocd" + }] + } + }, { + "dict": { + "type": "group", + "dicts": [{ + "type": "ocd", + "file": "TSPhrases.ocd" + }, { + "type": "ocd", + "file": "TSCharacters.ocd" + }] + } + }] +} diff --git a/data/config/mix2zhs.ini b/data/config/mix2zhs.ini deleted file mode 100644 index 4984243..0000000 --- a/data/config/mix2zhs.ini +++ /dev/null @@ -1,21 +0,0 @@ -; Open Chinese Convert -; -; Copyright 2010-2013 BYVoid -; -; Licensed under the Apache License, Version 2.0 (the "License"); -; you may not use this file except in compliance with the License. -; You may obtain a copy of the License at -; -; http://www.apache.org/licenses/LICENSE-2.0 -; -; Unless required by applicable law or agreed to in writing, software -; distributed under the License is distributed on an "AS IS" BASIS, -; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -; See the License for the specific language governing permissions and -; limitations under the License. - -title = mix_to_simp -description = Standard Configuration for Conversion from Simplified-Traditional-Mixed Chinese to Simplified Chinese -dict0 = OCD simp_to_trad_characters.ocd -dict1 = OCD trad_to_simp_phrases.ocd -dict1 = OCD trad_to_simp_characters.ocd diff --git a/data/config/mix2zht.ini b/data/config/mix2zht.ini deleted file mode 100644 index 875796f..0000000 --- a/data/config/mix2zht.ini +++ /dev/null @@ -1,21 +0,0 @@ -; Open Chinese Convert -; -; Copyright 2010-2013 BYVoid -; -; Licensed under the Apache License, Version 2.0 (the "License"); -; you may not use this file except in compliance with the License. -; You may obtain a copy of the License at -; -; http://www.apache.org/licenses/LICENSE-2.0 -; -; Unless required by applicable law or agreed to in writing, software -; distributed under the License is distributed on an "AS IS" BASIS, -; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -; See the License for the specific language governing permissions and -; limitations under the License. - -title = mix_to_trad -description = Standard Configuration for Conversion from Simplified-Traditional-Mixed Chinese to Traditional Chinese -dict0 = OCD trad_to_simp_characters.ocd -dict1 = OCD simp_to_trad_phrases.ocd -dict1 = OCD simp_to_trad_characters.ocd diff --git a/data/config/s2hk.json b/data/config/s2hk.json new file mode 100644 index 0000000..9485440 --- /dev/null +++ b/data/config/s2hk.json @@ -0,0 +1,33 @@ +{ + "name": "Simplified Chinese to Traditional Chinese (Hong Kong standard)", + "segmentation": { + "type": "mmseg", + "dict": { + "type": "ocd", + "file": "STPhrases.ocd" + } + }, + "conversion_chain": [{ + "dict": { + "type": "group", + "dicts": [{ + "type": "ocd", + "file": "STPhrases.ocd" + }, { + "type": "ocd", + "file": "STCharacters.ocd" + }] + } + }, { + "dict": { + "type": "group", + "dicts": [{ + "type": "ocd", + "file": "HKVariantsPhrases.ocd" + }, { + "type": "ocd", + "file": "HKVariants.ocd" + }] + } + }] +} diff --git a/data/config/s2t.json b/data/config/s2t.json new file mode 100644 index 0000000..de1fad2 --- /dev/null +++ b/data/config/s2t.json @@ -0,0 +1,22 @@ +{ + "name": "Simplified Chinese to Traditional Chinese", + "segmentation": { + "type": "mmseg", + "dict": { + "type": "ocd", + "file": "STPhrases.ocd" + } + }, + "conversion_chain": [{ + "dict": { + "type": "group", + "dicts": [{ + "type": "ocd", + "file": "STPhrases.ocd" + }, { + "type": "ocd", + "file": "STCharacters.ocd" + }] + } + }] +} diff --git a/data/config/s2tw.json b/data/config/s2tw.json new file mode 100644 index 0000000..5fc6afe --- /dev/null +++ b/data/config/s2tw.json @@ -0,0 +1,27 @@ +{ + "name": "Simplified Chinese to Traditional Chinese (Taiwan standard)", + "segmentation": { + "type": "mmseg", + "dict": { + "type": "ocd", + "file": "STPhrases.ocd" + } + }, + "conversion_chain": [{ + "dict": { + "type": "group", + "dicts": [{ + "type": "ocd", + "file": "STPhrases.ocd" + }, { + "type": "ocd", + "file": "STCharacters.ocd" + }] + } + }, { + "dict": { + "type": "ocd", + "file": "TWVariants.ocd" + } + }] +} diff --git a/data/config/s2twp.json b/data/config/s2twp.json new file mode 100644 index 0000000..6a7f881 --- /dev/null +++ b/data/config/s2twp.json @@ -0,0 +1,32 @@ +{ + "name": "Simplified Chinese to Traditional Chinese (Taiwan standard, with phrases)", + "segmentation": { + "type": "mmseg", + "dict": { + "type": "ocd", + "file": "STPhrases.ocd" + } + }, + "conversion_chain": [{ + "dict": { + "type": "group", + "dicts": [{ + "type": "ocd", + "file": "STPhrases.ocd" + }, { + "type": "ocd", + "file": "STCharacters.ocd" + }] + } + }, { + "dict": { + "type": "ocd", + "file": "TWPhrases.ocd" + } + }, { + "dict": { + "type": "ocd", + "file": "TWVariants.ocd" + } + }] +} diff --git a/data/config/t2s.json b/data/config/t2s.json new file mode 100644 index 0000000..21ba6e4 --- /dev/null +++ b/data/config/t2s.json @@ -0,0 +1,22 @@ +{ + "name": "Traditional Chinese to Simplified Chinese", + "segmentation": { + "type": "mmseg", + "dict": { + "type": "ocd", + "file": "TSPhrases.ocd" + } + }, + "conversion_chain": [{ + "dict": { + "type": "group", + "dicts": [{ + "type": "ocd", + "file": "TSPhrases.ocd" + }, { + "type": "ocd", + "file": "TSCharacters.ocd" + }] + } + }] +} diff --git a/data/config/tw2s.json b/data/config/tw2s.json new file mode 100644 index 0000000..7c772e7 --- /dev/null +++ b/data/config/tw2s.json @@ -0,0 +1,33 @@ +{ + "name": "Traditional Chinese (Taiwan standard) to Simplified Chinese", + "segmentation": { + "type": "mmseg", + "dict": { + "type": "ocd", + "file": "TSPhrases.ocd" + } + }, + "conversion_chain": [{ + "dict": { + "type": "group", + "dicts": [{ + "type": "ocd", + "file": "TWVariantsRevPhrases.ocd" + }, { + "type": "ocd", + "file": "TWVariantsRev.ocd" + }] + } + }, { + "dict": { + "type": "group", + "dicts": [{ + "type": "ocd", + "file": "TSPhrases.ocd" + }, { + "type": "ocd", + "file": "TSCharacters.ocd" + }] + } + }] +} diff --git a/data/config/tw2sp.json b/data/config/tw2sp.json new file mode 100644 index 0000000..86078fe --- /dev/null +++ b/data/config/tw2sp.json @@ -0,0 +1,38 @@ +{ + "name": "Traditional Chinese (Taiwan standard) to Simplified Chinese (with phrases)", + "segmentation": { + "type": "mmseg", + "dict": { + "type": "ocd", + "file": "TSPhrases.ocd" + } + }, + "conversion_chain": [{ + "dict": { + "type": "group", + "dicts": [{ + "type": "ocd", + "file": "TWVariantsRevPhrases.ocd" + }, { + "type": "ocd", + "file": "TWVariantsRev.ocd" + }] + } + }, { + "dict": { + "type": "ocd", + "file": "TWPhrasesRev.ocd" + } + }, { + "dict": { + "type": "group", + "dicts": [{ + "type": "ocd", + "file": "TSPhrases.ocd" + }, { + "type": "ocd", + "file": "TSCharacters.ocd" + }] + } + }] +} diff --git a/data/config/zhs2zht.ini b/data/config/zhs2zht.ini deleted file mode 100644 index 3b69d38..0000000 --- a/data/config/zhs2zht.ini +++ /dev/null @@ -1,20 +0,0 @@ -; Open Chinese Convert -; -; Copyright 2010-2013 BYVoid -; -; Licensed under the Apache License, Version 2.0 (the "License"); -; you may not use this file except in compliance with the License. -; You may obtain a copy of the License at -; -; http://www.apache.org/licenses/LICENSE-2.0 -; -; Unless required by applicable law or agreed to in writing, software -; distributed under the License is distributed on an "AS IS" BASIS, -; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -; See the License for the specific language governing permissions and -; limitations under the License. - -title = simp_to_trad -description = Standard Configuration for Conversion from Simplified Chinese to Traditional Chinese -dict0 = OCD simp_to_trad_phrases.ocd -dict0 = OCD simp_to_trad_characters.ocd diff --git a/data/config/zhs2zhtw_p.ini b/data/config/zhs2zhtw_p.ini deleted file mode 100644 index 2d834b1..0000000 --- a/data/config/zhs2zhtw_p.ini +++ /dev/null @@ -1,21 +0,0 @@ -; Open Chinese Convert -; -; Copyright 2011-2013 BYVoid -; -; Licensed under the Apache License, Version 2.0 (the "License"); -; you may not use this file except in compliance with the License. -; You may obtain a copy of the License at -; -; http://www.apache.org/licenses/LICENSE-2.0 -; -; Unless required by applicable law or agreed to in writing, software -; distributed under the License is distributed on an "AS IS" BASIS, -; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -; See the License for the specific language governing permissions and -; limitations under the License. - -title = simp_to_taiwan_phrases -description = from Simplified to phrases of Taiwan -dict0 = OCD simp_to_trad_phrases.ocd -dict0 = OCD simp_to_trad_characters.ocd -dict1 = TEXT to_tw_phrases.txt \ No newline at end of file diff --git a/data/config/zhs2zhtw_v.ini b/data/config/zhs2zhtw_v.ini deleted file mode 100644 index ca7f02f..0000000 --- a/data/config/zhs2zhtw_v.ini +++ /dev/null @@ -1,21 +0,0 @@ -; Open Chinese Convert -; -; Copyright 2011-2013 BYVoid -; -; Licensed under the Apache License, Version 2.0 (the "License"); -; you may not use this file except in compliance with the License. -; You may obtain a copy of the License at -; -; http://www.apache.org/licenses/LICENSE-2.0 -; -; Unless required by applicable law or agreed to in writing, software -; distributed under the License is distributed on an "AS IS" BASIS, -; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -; See the License for the specific language governing permissions and -; limitations under the License. - -title = simp_to_taiwan_variants -description = from Simplified to variants of Taiwan -dict0 = OCD simp_to_trad_phrases.ocd -dict0 = OCD simp_to_trad_characters.ocd -dict1 = TEXT to_tw_variants.txt \ No newline at end of file diff --git a/data/config/zhs2zhtw_vp.ini b/data/config/zhs2zhtw_vp.ini deleted file mode 100644 index bd103d6..0000000 --- a/data/config/zhs2zhtw_vp.ini +++ /dev/null @@ -1,22 +0,0 @@ -; Open Chinese Convert -; -; Copyright 2011-2013 BYVoid -; -; Licensed under the Apache License, Version 2.0 (the "License"); -; you may not use this file except in compliance with the License. -; You may obtain a copy of the License at -; -; http://www.apache.org/licenses/LICENSE-2.0 -; -; Unless required by applicable law or agreed to in writing, software -; distributed under the License is distributed on an "AS IS" BASIS, -; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -; See the License for the specific language governing permissions and -; limitations under the License. - -title = simp_to_taiwan_variants_phrases -description = from Simplified to variants and phrases of Taiwan -dict0 = OCD simp_to_trad_phrases.ocd -dict0 = OCD simp_to_trad_characters.ocd -dict1 = TEXT to_tw_phrases.txt -dict1 = TEXT to_tw_variants.txt \ No newline at end of file diff --git a/data/config/zht2zhs.ini b/data/config/zht2zhs.ini deleted file mode 100644 index fd9e78d..0000000 --- a/data/config/zht2zhs.ini +++ /dev/null @@ -1,20 +0,0 @@ -; Open Chinese Convert -; -; Copyright 2010-2013 BYVoid -; -; Licensed under the Apache License, Version 2.0 (the "License"); -; you may not use this file except in compliance with the License. -; You may obtain a copy of the License at -; -; http://www.apache.org/licenses/LICENSE-2.0 -; -; Unless required by applicable law or agreed to in writing, software -; distributed under the License is distributed on an "AS IS" BASIS, -; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -; See the License for the specific language governing permissions and -; limitations under the License. - -title = trad_to_simp -description = Standard Configuration for Conversion from Traditional Chinese to Simplified Chinese -dict0 = OCD trad_to_simp_phrases.ocd -dict0 = OCD trad_to_simp_characters.ocd diff --git a/data/config/zht2zhtw_p.ini b/data/config/zht2zhtw_p.ini deleted file mode 100644 index d5039e4..0000000 --- a/data/config/zht2zhtw_p.ini +++ /dev/null @@ -1,19 +0,0 @@ -; Open Chinese Convert -; -; Copyright 2011-2013 BYVoid -; -; Licensed under the Apache License, Version 2.0 (the "License"); -; you may not use this file except in compliance with the License. -; You may obtain a copy of the License at -; -; http://www.apache.org/licenses/LICENSE-2.0 -; -; Unless required by applicable law or agreed to in writing, software -; distributed under the License is distributed on an "AS IS" BASIS, -; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -; See the License for the specific language governing permissions and -; limitations under the License. - -title = trad_to_taiwan_phrases -description = from Traditional to phrases of Taiwan -dict0 = TEXT to_tw_phrases.txt \ No newline at end of file diff --git a/data/config/zht2zhtw_v.ini b/data/config/zht2zhtw_v.ini deleted file mode 100644 index e1a3fca..0000000 --- a/data/config/zht2zhtw_v.ini +++ /dev/null @@ -1,19 +0,0 @@ -; Open Chinese Convert -; -; Copyright 2011-2013 BYVoid -; -; Licensed under the Apache License, Version 2.0 (the "License"); -; you may not use this file except in compliance with the License. -; You may obtain a copy of the License at -; -; http://www.apache.org/licenses/LICENSE-2.0 -; -; Unless required by applicable law or agreed to in writing, software -; distributed under the License is distributed on an "AS IS" BASIS, -; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -; See the License for the specific language governing permissions and -; limitations under the License. - -title = trad_to_taiwan_variants -description = from Traditional to variants of Taiwan -dict0 = TEXT to_tw_variants.txt \ No newline at end of file diff --git a/data/config/zht2zhtw_vp.ini b/data/config/zht2zhtw_vp.ini deleted file mode 100644 index a1ae066..0000000 --- a/data/config/zht2zhtw_vp.ini +++ /dev/null @@ -1,20 +0,0 @@ -; Open Chinese Convert -; -; Copyright 2011-2013 BYVoid -; -; Licensed under the Apache License, Version 2.0 (the "License"); -; you may not use this file except in compliance with the License. -; You may obtain a copy of the License at -; -; http://www.apache.org/licenses/LICENSE-2.0 -; -; Unless required by applicable law or agreed to in writing, software -; distributed under the License is distributed on an "AS IS" BASIS, -; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -; See the License for the specific language governing permissions and -; limitations under the License. - -title = trad_to_taiwan_variants_phrases -description = from Traditional to variants and phrases of Taiwan -dict0 = TEXT to_tw_phrases.txt -dict0 = TEXT to_tw_variants.txt \ No newline at end of file diff --git a/data/config/zhtw2zhcn_s.ini b/data/config/zhtw2zhcn_s.ini deleted file mode 100644 index 44ea0c5..0000000 --- a/data/config/zhtw2zhcn_s.ini +++ /dev/null @@ -1,23 +0,0 @@ -; Open Chinese Convert -; -; Copyright 2011-2013 BYVoid -; -; Licensed under the Apache License, Version 2.0 (the "License"); -; you may not use this file except in compliance with the License. -; You may obtain a copy of the License at -; -; http://www.apache.org/licenses/LICENSE-2.0 -; -; Unless required by applicable law or agreed to in writing, software -; distributed under the License is distributed on an "AS IS" BASIS, -; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -; See the License for the specific language governing permissions and -; limitations under the License. - -title = trad_to_taiwan_variants_phrases -description = from Taiwan to China phrases (Simplified) -dict0 = TEXT from_tw_phrases.txt -dict0 = TEXT from_tw_variants.txt -dict1 = TEXT to_cn_phrases.txt -dict2 = OCD trad_to_simp_phrases.ocd -dict2 = OCD trad_to_simp_characters.ocd \ No newline at end of file diff --git a/data/config/zhtw2zhcn_t.ini b/data/config/zhtw2zhcn_t.ini deleted file mode 100644 index e6fed3b..0000000 --- a/data/config/zhtw2zhcn_t.ini +++ /dev/null @@ -1,21 +0,0 @@ -; Open Chinese Convert -; -; Copyright 2011-2013 BYVoid -; -; Licensed under the Apache License, Version 2.0 (the "License"); -; you may not use this file except in compliance with the License. -; You may obtain a copy of the License at -; -; http://www.apache.org/licenses/LICENSE-2.0 -; -; Unless required by applicable law or agreed to in writing, software -; distributed under the License is distributed on an "AS IS" BASIS, -; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -; See the License for the specific language governing permissions and -; limitations under the License. - -title = trad_to_taiwan_variants_phrases -description = from Taiwan to China phrases (Traditional) -dict0 = TEXT from_tw_phrases.txt -dict0 = TEXT from_tw_variants.txt -dict1 = TEXT to_cn_phrases.txt \ No newline at end of file diff --git a/data/config/zhtw2zhs.ini b/data/config/zhtw2zhs.ini deleted file mode 100644 index bde20a7..0000000 --- a/data/config/zhtw2zhs.ini +++ /dev/null @@ -1,22 +0,0 @@ -; Open Chinese Convert -; -; Copyright 2011-2013 BYVoid -; -; Licensed under the Apache License, Version 2.0 (the "License"); -; you may not use this file except in compliance with the License. -; You may obtain a copy of the License at -; -; http://www.apache.org/licenses/LICENSE-2.0 -; -; Unless required by applicable law or agreed to in writing, software -; distributed under the License is distributed on an "AS IS" BASIS, -; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -; See the License for the specific language governing permissions and -; limitations under the License. - -title = trad_to_taiwan_variants_phrases -description = from Taiwan to Simplified -dict0 = TEXT from_tw_phrases.txt -dict0 = TEXT from_tw_variants.txt -dict1 = OCD trad_to_simp_phrases.ocd -dict1 = OCD trad_to_simp_characters.ocd \ No newline at end of file diff --git a/data/config/zhtw2zht.ini b/data/config/zhtw2zht.ini deleted file mode 100644 index 387d8a3..0000000 --- a/data/config/zhtw2zht.ini +++ /dev/null @@ -1,20 +0,0 @@ -; Open Chinese Convert -; -; Copyright 2011-2013 BYVoid -; -; Licensed under the Apache License, Version 2.0 (the "License"); -; you may not use this file except in compliance with the License. -; You may obtain a copy of the License at -; -; http://www.apache.org/licenses/LICENSE-2.0 -; -; Unless required by applicable law or agreed to in writing, software -; distributed under the License is distributed on an "AS IS" BASIS, -; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -; See the License for the specific language governing permissions and -; limitations under the License. - -title = trad_to_taiwan_variants_phrases -description = from Taiwan to Traditional -dict0 = TEXT from_tw_phrases.txt -dict0 = TEXT from_tw_variants.txt \ No newline at end of file diff --git a/data/dictionary/HKVariants.txt b/data/dictionary/HKVariants.txt new file mode 100644 index 0000000..73c2c5b --- /dev/null +++ b/data/dictionary/HKVariants.txt @@ -0,0 +1,62 @@ +僞 偽 +兌 兑 +冑 胄 +冗 宂 +勳 勛 +啓 啟 +嘆 歎 +囪 囱 +妝 粧 +媼 媪 +嬀 媯 +岩 巖 +悅 悦 +慍 愠 +戶 户 +挩 捝 +搵 揾 +擡 抬 +敓 敚 +敘 敍 +柺 枴 +梲 棁 +棱 稜 +榲 榅 +檯 枱 +氳 氲 +涌 湧 +涗 涚 +溫 温 +溼 濕 +潙 溈 +潨 潀 +熅 煴 +爲 為 +痹 痺 +癡 痴 +稅 税 +竈 灶 +糉 粽 +縕 緼 +纔 才 +脫 脱 +膃 腽 +臥 卧 +臺 台 +蒕 蒀 +蔥 葱 +蔿 蒍 +蘊 藴 +蛻 蜕 +衆 眾 +衛 衞 +覈 核 +說 説 +贗 贋 +踊 踴 +轀 輼 +醞 醖 +鉤 鈎 +銳 鋭 +閱 閲 +鰮 鰛 diff --git a/data/dictionary/HKVariantsPhrases.txt b/data/dictionary/HKVariantsPhrases.txt new file mode 100644 index 0000000..f73f0a2 --- /dev/null +++ b/data/dictionary/HKVariantsPhrases.txt @@ -0,0 +1,17 @@ +南涌 南涌 +大欖涌 大欖涌 +大涌 大涌 +東涌 東涌 +沙河涌 沙河涌 +沙魚涌 沙魚涌 +河涌 河涌 +泥涌 泥涌 +涌尾 涌尾 +深涌 深涌 +溪涌 溪涌 +葵涌 葵涌 +蠔涌 蠔涌 +西涌 西涌 +鰂魚涌 鰂魚涌 +麻涌 麻涌 +黎涌 黎涌 diff --git a/data/dictionary/HKVariantsRevPhrases.txt b/data/dictionary/HKVariantsRevPhrases.txt new file mode 100644 index 0000000..b8a1dc1 --- /dev/null +++ b/data/dictionary/HKVariantsRevPhrases.txt @@ -0,0 +1,139 @@ +七星巖 七星巖 +世胄 世胄 +介胄 介冑 +傅巖 傅巖 +免胄 免冑 +冠胄 冠冑 +千巖競秀 千巖競秀 +千巖萬壑 千巖萬壑 +千巖萬谷 千巖萬谷 +台山 台山 +台州 台州 +台州地區 台州地區 +台州市 台州市 +名胄 名胄 +國胄 國胄 +圍巖 圍巖 +地胄 地胄 +壓胄子 壓冑子 +士胄 士胄 +大巖桐 大巖桐 +天台女 天台女 +天台宗 天台宗 +天台山 天台山 +天台縣 天台縣 +天潢貴胄 天潢貴胄 +奇巖 奇巖 +寶胄 寶胄 +小巖洞 小巖洞 +岫巖縣 岫巖縣 +峯巖 峯巖 +嵌巖 嵌巖 +巉巖 巉巖 +巖壁 巖壁 +巖居 巖居 +巖居穴處 巖居穴處 +巖居谷飲 巖居谷飲 +巖岸 巖岸 +巖巉 巖巉 +巖巖 巖巖 +巖徼 巖徼 +巖手縣 巖手縣 +巖村 巖村 +巖洞 巖洞 +巖流圈 巖流圈 +巖牆 巖牆 +巖牆之下 巖牆之下 +巖畫 巖畫 +巖穴 巖穴 +巖穴之士 巖穴之士 +巖薔薇 巖薔薇 +巖邑 巖邑 +巖郎 巖郎 +巖阻 巖阻 +巖陛 巖陛 +帝胄 帝胄 +幽巖 幽巖 +幽棲巖谷 幽棲巖谷 +懸巖 懸巖 +懸巖峭壁 懸巖峭壁 +懸胄 懸冑 +攀巖 攀巖 +支胄 支胄 +教胄 教胄 +景胄 景胄 +望胄 望胄 +末胄 末胄 +村胄 村胄 +枕巖漱流 枕巖漱流 +枝胄 枝胄 +氏胄 氏胄 +洪胄 洪胄 +浙江天台縣 浙江天台縣 +清胄 清胄 +灰巖殘丘 灰巖殘丘 +玄胄 玄胄 +甲胄 甲冑 +甲胄魚類 甲冑魚類 +皇胄 皇胄 +石灰巖洞 石灰巖洞 +神胄 神胄 +簪纓世胄 簪纓世胄 +系胄 系胄 +紅巖 紅巖 +絕巖 絕巖 +緒胄 緒胄 +纂胄 纂胄 +胄嗣 胄嗣 +胄子 胄子 +胄序 胄序 +胄族 胄族 +胄甲 冑甲 +胄監 胄監 +胄科 冑科 +胄緒 胄緒 +胄胤 胄胤 +胄裔 胄裔 +胄裔繁衍 胄裔繁衍 +胄閥 胄閥 +胡雪巖 胡雪巖 +胤胄 胤胄 +苗胄 苗胄 +英胄 英胄 +華胄 華胄 +血胄 血胄 +裔胄 裔胄 +訓胄 訓胄 +試胄 試胄 +豪門貴胄 豪門貴胄 +貝胄 貝冑 +貴胄 貴胄 +賢胄 賢胄 +躬擐甲胄 躬擐甲冑 +遐胄 遐胄 +遙胄 遙胄 +遙遙華胄 遙遙華胄 +遠胄 遠胄 +遺胄 遺胄 +重巖疊嶂 重巖疊嶂 +金胄 金胄 +鎧胄 鎧冑 +鑿巖 鑿巖 +門胄 門胄 +雲巖區 雲巖區 +非層巖 非層巖 +韓侂胄 韓侂冑 +飮胄 飮冑 +骨巖巖 骨巖巖 +高胄 高胄 +魚胄 魚冑 +鮮胄 鮮胄 +鴻胄 鴻胄 +黃巖區 黃巖區 +黃巖島 黃巖島 +黃炎貴胄 黃炎貴胄 +齒胄 齒胄 +龍巖 龍巖 +龍巖市 龍巖市 +龍巖村 龍巖村 +龍胄 龍胄 diff --git a/data/jp/to_jp_variants.txt b/data/dictionary/JPVariants.txt similarity index 100% rename from data/jp/to_jp_variants.txt rename to data/dictionary/JPVariants.txt diff --git a/data/simp_to_trad/characters.txt b/data/dictionary/STCharacters.txt similarity index 81% rename from data/simp_to_trad/characters.txt rename to data/dictionary/STCharacters.txt index 5b8a6ef..f02fb46 100644 --- a/data/simp_to_trad/characters.txt +++ b/data/dictionary/STCharacters.txt @@ -1,8 +1,12 @@ +㐷 傌 +㐹 㑶 㐹 㐽 偑 +㑇 㑳 㑈 倲 㑔 㑯 㑩 儸 㓥 劏 +㓰 劃 㔉 劚 㖊 噚 㖞 喎 @@ -23,6 +27,7 @@ 㧟 擓 㧰 擽 㨫 㩜 +㭎 棡 㭏 椲 㭣 𣙎 㭤 樢 @@ -35,6 +40,7 @@ 㳠 澾 㳡 濄 㳢 𣾷 +㳽 瀰 㶉 鸂 㶶 燶 㶽 煱 @@ -46,6 +52,7 @@ 䁖 瞜 䂵 碽 䅉 稏 +䅟 穇 䅪 𥢢 䇲 筴 䉤 籔 @@ -58,10 +65,14 @@ 䌼 綐 䌽 綵 䌾 䋻 +䌿 䋹 䍀 繿 䍁 繸 +䎬 䎱 +䏝 膞 䑽 𦪙 䓕 薳 +䓖 藭 䗖 螮 䘛 𧝞 䘞 𧜗 @@ -69,8 +80,9 @@ 䙌 䙡 䙓 襬 䜣 訢 +䜤 鿁 䜥 𧩙 -䜧 譅 +䜧 䜀 䜩 讌 䝙 貙 䞌 𧵳 @@ -110,8 +122,8 @@ 䴕 鴷 䴖 鶄 䴗 鶪 -䴘 鷈 -䴙 鷿 +䴘 鷈 鷉 +䴙 鷿 鸊 䶮 龑 万 萬 万 与 與 @@ -287,7 +299,7 @@ 劲 勁 劳 勞 势 勢 -勋 勳 +勋 勳 勛 勚 勩 匀 勻 匦 匭 @@ -329,6 +341,7 @@ 叁 叄 参 參 蔘 叆 靉 +叇 靆 双 雙 发 發 髮 变 變 @@ -395,7 +408,7 @@ 啧 嘖 啬 嗇 啭 囀 -啮 齧 +啮 齧 嚙 啯 嘓 啰 囉 啴 嘽 @@ -449,7 +462,7 @@ 垲 塏 垴 堖 埘 塒 -埙 塤 +埙 壎 塤 埚 堝 埯 垵 堑 塹 @@ -459,7 +472,7 @@ 声 聲 壳 殼 壶 壺 -壸 壺 +壸 壼 处 處 备 備 复 復 複 覆 @@ -492,7 +505,7 @@ 娘 娘 孃 娱 娛 娲 媧 -娴 嫻 +娴 嫺 嫻 婳 嫿 婴 嬰 婵 嬋 @@ -551,6 +564,7 @@ 岳 嶽 岳 岽 崬 岿 巋 +峃 嶨 峄 嶧 峡 峽 峣 嶢 @@ -601,6 +615,7 @@ 庞 龐 废 廢 庵 庵 菴 +庼 廎 廪 廩 开 開 异 異 @@ -773,6 +788,7 @@ 攒 攢 敌 敵 教 教 +敚 敓 敛 斂 敩 斆 数 數 @@ -788,6 +804,7 @@ 旸 暘 昆 昆 崑 昙 曇 +昵 暱 昼 晝 昽 曨 显 顯 @@ -867,7 +884,7 @@ 梿 槤 检 檢 棁 梲 -棂 欞 +棂 櫺 欞 棱 棱 椁 槨 椝 槼 @@ -877,6 +894,7 @@ 椤 欏 椫 樿 椭 橢 +椮 槮 楼 樓 榄 欖 榅 榲 @@ -969,6 +987,7 @@ 浕 濜 涂 塗 涂 涌 涌 +涚 涗 涛 濤 涝 澇 涞 淶 @@ -1003,7 +1022,7 @@ 滗 潷 滚 滾 滞 滯 -滟 灩 +滟 灩 灧 滠 灄 满 滿 滢 瀅 @@ -1060,7 +1079,7 @@ 爱 愛 爷 爺 牍 牘 -牦 氂 +牦 犛 牵 牽 牺 犧 犊 犢 @@ -1088,6 +1107,7 @@ 献 獻 獭 獺 玑 璣 +玙 璵 玚 瑒 玛 瑪 玮 瑋 @@ -1105,6 +1125,7 @@ 琼 瓊 瑶 瑤 瑷 璦 +瑸 璸 璎 瓔 瓒 瓚 瓮 甕 @@ -1186,7 +1207,7 @@ 硚 礄 确 確 确 硵 磠 -硷 鹼 +硷 礆 碍 礙 碛 磧 碜 磣 @@ -1276,6 +1297,7 @@ 紧 緊 累 累 絷 縶 +緼 縕 纟 糹 纠 糾 纡 紆 @@ -1358,13 +1380,13 @@ 绮 綺 绯 緋 绰 綽 -绱 鞝 +绱 鞝 緔 绲 緄 绳 繩 维 維 绵 綿 绶 綬 -绷 繃 +绷 繃 綳 绸 綢 绹 綯 绺 綹 @@ -1389,6 +1411,7 @@ 缍 綞 缎 緞 缏 緶 +缐 線 缑 緱 缒 縋 缓 緩 @@ -1459,6 +1482,7 @@ 肿 腫 胀 脹 胁 脅 +胄 胄 冑 胆 膽 背 背 揹 胜 勝 胜 @@ -1498,7 +1522,7 @@ 舱 艙 舻 艫 艰 艱 -艳 豔 +艳 豔 艷 艺 藝 节 節 芈 羋 @@ -1560,6 +1584,7 @@ 莹 瑩 莺 鶯 莼 蓴 +萚 蘀 萝 蘿 萤 螢 营 營 @@ -1567,6 +1592,7 @@ 萧 蕭 萨 薩 葱 蔥 +蒀 蒕 蒇 蕆 蒉 蕢 蒋 蔣 @@ -1589,6 +1615,7 @@ 蕴 蘊 薮 藪 藓 蘚 +藴 蘊 蘖 櫱 虏 虜 虑 慮 @@ -1646,7 +1673,7 @@ 裢 褳 裣 襝 裤 褲 -裥 襇 +裥 襉 襇 褛 褸 褴 襤 襕 襴 @@ -1707,7 +1734,7 @@ 设 設 访 訪 诀 訣 -证 證 +证 證 証 诂 詁 诃 訶 评 評 @@ -1779,6 +1806,7 @@ 谆 諄 谇 誶 谈 談 +谉 讅 谊 誼 谋 謀 谌 諶 @@ -1890,7 +1918,7 @@ 赚 賺 赛 賽 赜 賾 -赝 贗 +赝 贗 贋 赞 贊 讚 赟 贇 赠 贈 @@ -1924,6 +1952,7 @@ 躏 躪 躜 躦 躯 軀 +輼 轀 车 車 轧 軋 轨 軌 @@ -2024,12 +2053,12 @@ 酂 酇 酝 醞 酦 醱 -酰 醯 酱 醬 酸 酸 痠 酽 釅 酾 釃 酿 釀 +醖 醞 采 採 采 寀 释 釋 里 裏 里 @@ -2062,13 +2091,13 @@ 钜 鉅 钝 鈍 钞 鈔 -钟 鍾 鐘 +钟 鍾 鐘 鈡 钠 鈉 钡 鋇 钢 鋼 钣 鈑 钤 鈐 -钥 鑰 +钥 鑰 鈅 钦 欽 钧 鈞 钨 鎢 @@ -2090,7 +2119,7 @@ 钸 鈽 钹 鈸 钺 鉞 -钻 鑽 +钻 鑽 鉆 钼 鉬 钽 鉭 钾 鉀 @@ -2174,7 +2203,7 @@ 锌 鋅 锍 鋶 锎 鐦 -锏 鐧 +锏 鐗 鐧 锐 銳 锑 銻 锒 鋃 @@ -2199,7 +2228,7 @@ 锥 錐 锦 錦 锧 鑕 -锨 杴 +锨 杴 鍁 锩 錈 锪 鍃 锫 錇 @@ -2234,7 +2263,7 @@ 镈 鎛 镉 鎘 镊 鑷 -镋 鎲 +镋 钂 鎲 镌 鐫 镍 鎳 镎 鎿 @@ -2257,7 +2286,7 @@ 镟 鏇 镠 鏐 镡 鐔 -镢 钁 +镢 钁 鐝 镣 鐐 镤 鏷 镥 鑥 @@ -2572,7 +2601,7 @@ 鲄 魺 鲅 鮁 鲆 鮃 -鲇 鯰 +鲇 鮎 鲈 鱸 鲉 鮋 鲊 鮓 @@ -2619,7 +2648,7 @@ 鲳 鯧 鲴 鯝 鲵 鯢 -鲶 鯰 +鲶 鮎 鯰 鲷 鯛 鲸 鯨 鲹 鰺 @@ -2630,7 +2659,7 @@ 鲾 鰏 鲿 鱨 鳀 鯷 -鳁 鰮 +鳁 鰮 鰛 鳂 鰃 鳃 鰓 鳄 鱷 @@ -2665,6 +2694,7 @@ 鳡 鱤 鳢 鱧 鳣 鱣 +鳤 䲘 鸟 鳥 鸠 鳩 鸡 雞 @@ -2705,7 +2735,7 @@ 鹄 鵠 鹅 鵝 鹆 鵒 -鹇 鷳 +鹇 鷳 鷴 鹈 鵜 鹉 鵡 鹊 鵲 @@ -2787,17 +2817,18 @@ 龚 龔 龛 龕 龟 龜 - 棡 𠆲 儣 𠆿 𠌥 𠉂 㒓 𠉗 𠏢 +𠊉 𣍐 𠚳 𠠎 𠛅 剾 𠛆 𠞆 𠮶 嗰 𠯟 哯 𠯠 噅 +𠱞 囃 𠲥 𡅏 𠴢 𡄔 𠵸 𡄣 @@ -2853,6 +2884,7 @@ 𤶧 𤸫 𤽯 㿧 𤾀 皟 +𤿲 麬 𥅘 𥌃 𥅴 䀹 𥆧 瞤 @@ -2901,12 +2933,16 @@ 𦟗 𦣎 𦨩 𦪽 𦰴 䕳 +𦼖 檾 𧉞 䗿 𧒭 𧔥 +𧝧 𧟀 𧮪 詀 𧳕 𧳟 𧹑 䞈 +𧹒 買 𧹓 𧶔 +𧹔 賬 𧹕 䝻 𧹖 賟 𧹗 贃 @@ -2975,9 +3011,10 @@ 𩖕 𩓣 𩖖 顃 𩖗 䫴 +𩗡 䬞 𩙥 颰 𩙦 𩗀 -𩙧 𩗡 +𩙧 䬞 𩙨 𩘹 𩙩 𩘀 𩙪 颷 @@ -3019,12 +3056,14 @@ 𩧴 駩 𩧵 𩢴 𩧶 𩣏 +𩧸 𩣫 𩧺 駶 𩧻 𩣵 𩧼 𩣺 𩧿 䮠 𩨀 騔 𩨁 䮞 +𩨂 驄 𩨃 騝 𩨄 騪 𩨅 𩤸 @@ -3079,6 +3118,7 @@ 𪉐 𪃍 𪉑 鷔 𪉒 𪄕 +𪉓 𪈼 𪉔 𪄆 𪉕 𪇳 𪎈 䴬 @@ -3091,49 +3131,605 @@ 𪚏 𪘀 𪚐 𪘯 𪞝 凙 +𪟎 㔋 +𪟝 勣 +𪠀 𧷎 +𪠡 𠬙 +𪠳 唓 +𪠵 㖮 +𪠸 嚛 +𪠽 噹 +𪡀 嘺 +𪡃 嘪 +𪡋 噞 𪡏 嗹 +𪡞 嘳 +𪡺 𡃄 +𪢌 㘓 +𪢐 𡃤 +𪢒 𡂡 +𪢕 嚽 +𪢠 囒 𪢮 圞 +𪣆 埬 +𪣒 堚 +𪣻 塿 +𪤄 𡓁 +𪤚 壣 +𪥠 𧹈 +𪥫 孇 +𪥰 嬣 +𪥿 嬻 +𪧀 孾 +𪧘 寠 𪨊 㞞 𪨗 屩 +𪨧 崙 +𪨩 𡸗 +𪨶 輋 +𪨷 巗 +𪨹 𡹬 +𪩇 㟺 +𪩎 巊 +𪩸 幩 +𪪑 㢗 +𪪞 廧 +𪪴 𢍰 +𪫌 徿 +𪫡 𢤩 +𪫷 㦞 +𪫺 憸 +𪬚 𢣐 +𪬯 𢤿 +𪭝 𢯷 +𪭢 摐 +𪭧 擟 +𪭯 𢶒 +𪭵 掚 +𪭾 撊 +𪮃 㨻 +𪮋 㩋 +𪮖 撧 +𪮳 𢺳 +𪮶 攋 +𪯋 㪎 +𪰶 曊 +𪱥 膹 +𪱷 梖 +𪲎 櫅 +𪲔 欐 +𪲛 檵 +𪲮 櫠 +𪳍 欇 +𪴙 欑 +𪵑 毊 +𪵣 霼 +𪵱 濿 +𪶄 溡 +𪶒 𤄷 +𪶮 𣽏 +𪷍 㵾 +𪷽 灒 +𪸕 熂 +𪸩 煇 +𪹀 𤑹 +𪹠 𤓌 +𪹳 爥 +𪹹 𤒻 +𪺣 𤘀 +𪺪 𤜆 +𪺭 犞 +𪺷 獊 +𪺸 𤠮 +𪺻 㺜 +𪺽 猌 𪻐 瑽 +𪻨 瓄 +𪻲 瑻 +𪻺 璝 +𪼋 㻶 +𪼴 𤬅 +𪽝 𤳷 +𪽪 痮 +𪽭 𤷃 +𪽮 㿖 +𪽴 𤺔 +𪽷 瘱 +𪾔 盨 𪾢 睍 +𪾦 矑 +𪾸 矉 +𪿊 𥏝 +𪿞 𥖲 +𪿫 礮 +𪿵 𥗇 +𫀌 𥜰 +𫀓 𥜐 +𫀨 䅐 +𫀬 䅳 +𫀮 𥢷 +𫁂 䆉 +𫁟 竱 𫁡 鴗 +𫁲 䉑 +𫁳 𥯤 +𫁷 䉶 +𫁺 𥴼 +𫂃 簢 +𫂆 簂 𫂈 䉬 +𫂖 𥴨 +𫂿 𥻦 +𫃗 𩏷 +𫄙 糺 +𫄚 䊺 +𫄛 紟 +𫄜 䋃 +𫄝 𥾯 +𫄞 䋔 +𫄟 絁 +𫄠 絙 +𫄡 絧 +𫄢 絥 +𫄣 繷 +𫄤 繨 +𫄥 纚 +𫄦 𦀖 +𫄧 綖 𫄨 絺 +𫄩 䋦 +𫄪 𦅇 +𫄫 綟 +𫄬 緤 +𫄭 緮 +𫄮 䋼 +𫄯 𦃩 +𫄰 縍 +𫄱 繬 +𫄲 縸 +𫄳 縰 +𫄴 繂 +𫄵 𦅈 +𫄶 繈 +𫄷 繶 𫄸 纁 +𫄹 纗 +𫅅 䍤 +𫅗 羵 +𫅥 𦒀 +𫅭 䎙 +𫅼 𦔖 +𫆏 聻 +𫆝 𦟼 +𫆫 𦡝 +𫇘 𦧺 +𫇭 蔿 +𫇴 蒭 +𫇽 蕽 +𫈉 蕳 +𫈎 葝 +𫈟 蔯 +𫈵 蕝 +𫉁 薆 +𫉄 藷 +𫊪 䗅 +𫊮 蠦 +𫊸 蟜 +𫊹 𧒯 +𫊻 蟳 +𫋇 蟂 +𫋌 蟘 +𫋲 䙔 +𫋷 襗 +𫋹 襓 +𫋻 襘 𫌀 襀 +𫌇 襵 +𫌋 𧞫 𫌨 覼 +𫌪 覛 +𫌫 𧡴 +𫌬 𧢄 +𫌭 覹 +𫌯 䚩 𫍙 訑 +𫍚 訞 +𫍛 訜 +𫍜 詓 +𫍝 諫 +𫍞 𧦝 𫍟 𧦧 +𫍠 䛄 +𫍡 詑 𫍢 譊 +𫍣 詷 +𫍤 譑 +𫍥 誂 +𫍦 譨 +𫍧 誺 +𫍨 誫 +𫍩 諣 +𫍪 誋 +𫍫 䛳 +𫍬 誷 +𫍭 𧩕 +𫍮 誳 +𫍯 諴 𫍰 諰 +𫍱 諯 𫍲 謏 +𫍳 諥 +𫍴 謱 +𫍵 謸 +𫍶 𧩼 +𫍷 謉 +𫍸 謆 +𫍹 謯 +𫍺 𧫝 +𫍻 譆 +𫍼 𧬤 +𫍽 譞 +𫍾 𧭈 +𫍿 譾 +𫎆 豵 +𫎌 貗 +𫎦 贚 +𫎧 䝭 +𫎨 𧸘 +𫎩 賝 +𫎪 䞋 +𫎫 贉 +𫎬 贑 +𫎭 䞓 +𫎱 䟐 +𫎳 䟆 +𫎸 𧽯 +𫎺 䟃 +𫏃 䠆 +𫏆 蹳 𫏋 蹻 +𫏌 𨂐 +𫏐 蹔 +𫏑 𨇽 +𫏕 𨆪 +𫏞 𨇰 +𫏨 𨇤 𫐄 軏 +𫐅 軕 𫐆 轣 +𫐇 軜 +𫐈 軷 𫐉 軨 +𫐊 軬 +𫐋 𨎌 +𫐌 軿 +𫐍 𨌈 +𫐎 輢 +𫐏 輖 𫐐 輗 +𫐑 輨 +𫐒 輷 𫐓 輮 +𫐔 𨍰 +𫐕 轊 +𫐖 轇 +𫐗 轐 +𫐘 轗 +𫐙 轠 +𫐷 遱 +𫑘 鄟 +𫑡 鄳 +𫑷 醶 +𫓥 釟 +𫓦 釨 𫓧 鈇 +𫓨 鈛 𫓩 鏦 +𫓪 鈆 +𫓫 𨥟 +𫓬 鉔 +𫓭 鉠 +𫓮 𨪕 +𫓯 銈 +𫓰 銊 +𫓱 鐈 +𫓲 銁 +𫓳 𨰋 +𫓴 鉾 +𫓵 鋠 +𫓶 鋗 +𫓷 𫒡 +𫓸 錽 +𫓹 錤 +𫓺 鐪 +𫓻 錜 +𫓼 𨨛 +𫓽 錝 +𫓾 錥 +𫓿 𨨢 +𫔀 鍊 +𫔁 鐼 +𫔂 鍉 +𫔃 𨰲 +𫔄 鍒 +𫔅 鎍 +𫔆 䥯 +𫔇 鎞 +𫔈 鎙 +𫔉 𨰃 +𫔊 鏥 +𫔋 䥗 +𫔌 鏾 +𫔍 鐇 𫔎 鐍 +𫔏 𨬖 +𫔐 𨭸 +𫔑 𨭖 +𫔒 𨮳 +𫔓 𨯟 +𫔔 鑴 +𫔕 𨰥 +𫔭 開 +𫔮 閒 +𫔯 閗 +𫔰 閞 +𫔲 𨴹 +𫔴 閵 +𫔵 䦯 +𫔶 闑 +𫔽 𨼳 +𫕚 𩀨 +𫕥 霣 +𫕨 𩅙 +𫖃 靧 +𫖅 䪊 +𫖇 鞾 +𫖑 𩎖 +𫖒 韠 +𫖓 𩏂 +𫖔 韛 +𫖕 韝 +𫖖 𩏠 +𫖫 䪴 +𫖬 䪾 +𫖭 𩒎 +𫖮 顗 +𫖯 頫 +𫖰 䫂 +𫖱 䫀 +𫖲 䫟 +𫖳 頵 +𫖴 𩔳 +𫖵 𩓥 +𫖶 顅 +𫖷 𩔑 +𫖸 願 +𫖹 顣 +𫖺 䫶 +𫗇 䫻 +𫗈 𩗓 +𫗉 𩗴 +𫗊 䬓 +𫗋 飋 +𫗚 𩟗 +𫗞 飦 +𫗟 䬧 𫗠 餦 +𫗡 𩚩 +𫗢 飵 +𫗣 飶 +𫗤 𩛌 +𫗥 餫 𫗦 餔 𫗧 餗 +𫗨 𩛡 +𫗩 饠 +𫗪 餧 +𫗫 餬 +𫗬 餪 +𫗭 餵 𫗮 餭 +𫗯 餱 +𫗰 䭔 +𫗱 䭑 +𫗳 𩝽 𫗴 饘 +𫗵 饟 +𫘛 馯 +𫘜 馼 𫘝 駃 +𫘞 駞 +𫘟 駊 +𫘠 駤 +𫘡 駫 𫘣 駻 𫘤 騃 +𫘥 騉 +𫘦 騊 +𫘧 騄 𫘨 騠 +𫘩 騜 +𫘪 騵 +𫘫 騴 +𫘬 騱 +𫘭 騻 +𫘮 䮰 +𫘯 驓 +𫘰 驙 +𫘱 驨 +𫘽 鬠 +𫙂 𩯁 𫚈 鱮 𫚉 魟 +𫚊 鰑 +𫚋 鱄 +𫚌 魦 +𫚍 魵 +𫚎 𩶁 +𫚏 䱁 +𫚐 䱀 +𫚑 鮅 𫚒 鮄 +𫚓 鮤 𫚔 鮰 𫚕 鰤 +𫚖 鮆 +𫚗 鮯 +𫚘 𩻮 𫚙 鯆 +𫚚 鮿 +𫚛 鮵 +𫚜 䲅 +𫚝 𩸄 +𫚞 鯬 +𫚟 𩸡 +𫚠 䱧 +𫚡 鯞 +𫚢 鰋 +𫚣 鯾 +𫚤 鰦 +𫚥 鰕 +𫚦 鰫 +𫚧 鰽 +𫚨 𩻗 +𫚩 𩻬 +𫚪 鱊 +𫚫 鱢 +𫚬 𩼶 +𫚭 鱲 +𫛚 鳽 𫛛 鳷 +𫛜 鴀 +𫛝 鴅 𫛞 鴃 +𫛟 鸗 +𫛠 𩿤 +𫛡 鴔 𫛢 鸋 +𫛣 鴥 +𫛤 鴐 +𫛥 鵊 +𫛦 鴮 +𫛧 𪀖 +𫛨 鵧 +𫛩 鴳 +𫛪 鴽 +𫛫 鶰 +𫛬 䳜 +𫛭 鵟 +𫛮 䳤 +𫛯 鶭 +𫛰 䳢 +𫛱 鵫 +𫛲 鵰 +𫛳 鵩 +𫛴 鷤 +𫛵 鶌 𫛶 鶒 +𫛷 鶦 𫛸 鶗 +𫛹 𪃧 +𫛺 䳧 +𫛻 𪃒 +𫛼 䳫 +𫛽 鷅 +𫛾 𪆷 +𫜀 鷐 +𫜁 鷩 +𫜂 𪅂 +𫜃 鷣 +𫜄 鷷 +𫜅 䴋 +𫜊 𪉸 +𫜑 麷 +𫜒 䴱 +𫜓 𪌭 +𫜔 䴽 +𫜕 𪍠 +𫜙 䵴 +𫜟 𪓰 +𫜨 䶕 +𫜩 齧 +𫜪 齩 +𫜫 𫜦 +𫜬 齰 +𫜭 齭 +𫜮 齴 +𫜯 𪙏 +𫜰 齾 +𫜲 龓 +𫜳 䶲 +𫝈 㑮 +𫝋 𠐊 +𫝦 㛝 +𫝧 㜐 +𫝨 媈 +𫝩 嬦 +𫝪 𡟫 +𫝫 婡 +𫝬 嬇 +𫝭 孆 +𫝮 孄 +𫝵 嶹 +𫞅 𦠅 +𫞗 潣 +𫞚 澬 +𫞛 㶆 +𫞝 灍 +𫞠 爧 +𫞡 爃 +𫞢 𤛱 +𫞣 㹽 +𫞥 珼 +𫞦 璾 +𫞧 𤩂 +𫞨 璼 +𫞩 璊 +𫞷 𥢶 +𫟃 絍 +𫟄 綋 +𫟅 綡 +𫟆 緟 +𫟇 𦆲 +𫟑 䖅 +𫟕 䕤 +𫟞 訨 +𫟟 詊 +𫟠 譂 +𫟡 誴 +𫟢 䜖 +𫟤 䡐 +𫟥 䡩 +𫟦 䡵 +𫟫 𨞺 +𫟬 𨟊 +𫟲 釚 +𫟳 釲 +𫟴 鈖 +𫟵 鈗 +𫟶 銏 +𫟷 鉝 +𫟸 鉽 +𫟹 鉷 +𫟺 䤤 +𫟻 銂 +𫟼 鐽 +𫟽 𨧰 +𫟾 𨩰 +𫟿 鎈 +𫠀 䥄 +𫠁 鑉 +𫠂 閝 +𫠅 韚 +𫠆 頍 +𫠇 𩖰 +𫠈 䫾 +𫠊 䮄 +𫠋 騼 +𫠌 𩦠 +𫠏 𩵦 +𫠐 魽 +𫠑 䱸 +𫠒 鱆 +𫠖 𩿅 +𫠜 齯 diff --git a/data/simp_to_trad/phrases.txt b/data/dictionary/STPhrases.txt similarity index 99% rename from data/simp_to_trad/phrases.txt rename to data/dictionary/STPhrases.txt index e60efac..5527f86 100644 --- a/data/simp_to_trad/phrases.txt +++ b/data/dictionary/STPhrases.txt @@ -80,7 +80,7 @@ 一坛坛 一罈罈 一坛死水 一壇死水 一塌糊涂 一塌糊塗 -一壸千金 一壺千金 +一壶千金 一壺千金 一夜致富 一夜致富 一大伙 一大夥 一天后 一天後 @@ -1067,7 +1067,6 @@ 不合法 不合法 不合理 不合理 不合算 不合算 -不合羣 不合羣 不合群 不合羣 不合节 不合節 不合规定 不合規定 @@ -1522,6 +1521,7 @@ 世系 世系 世纪钟 世紀鐘 世纪钟表 世紀鐘錶 +世胄 世胄 世表 世表 世阿弥 世阿彌 世面 世面 @@ -1987,7 +1987,7 @@ 丰年稔岁 豐年稔歲 丰年节 豐年節 丰年虾 豐年蝦 -丰度 丰度 +丰度 丰度 豐度 丰悴 豐悴 丰情 丰情 丰收 豐收 @@ -3447,7 +3447,7 @@ 仆使 僕使 仆倒 仆倒 仆僮 僕僮 -仆僮成羣 僕僮成羣 +仆僮成群 僕僮成羣 仆吏 僕吏 仆固怀恩 僕固懷恩 仆地 仆地 @@ -3493,6 +3493,7 @@ 介壳虫 介殼蟲 介系词 介係詞 介绍出来 介紹出來 +介胄 介冑 介虫 介蟲 介面 介面 介面卡 介面卡 @@ -3625,7 +3626,7 @@ 仪制 儀制 仪器表 儀器表 仪征 儀徵 -仪征市 儀征市 +仪征市 儀徵市 仪态万千 儀態萬千 仪态万方 儀態萬方 仪注 儀注 @@ -4191,7 +4192,7 @@ 余震 餘震 余霞 餘霞 余音 餘音 -余音绕梁 餘音繞梁 +余音绕梁 餘音繞樑 余韵 餘韻 余项 餘項 余额 餘額 @@ -4330,7 +4331,7 @@ 依托 依託 依据 依據 依法炮制 依法炮製 -依然范特西 依然範特西 +依然范特西 依然范特西 依赞 依贊 依附于 依附於 侠气干云 俠氣干雲 @@ -4872,7 +4873,7 @@ 假叶 假葉 假意周旋 假意周旋 假托 假託 -假期忧郁症候羣 假期憂鬱症候羣 +假期忧郁症候群 假期憂鬱症候羣 假药 假藥 假面 假面 假面具 假面具 @@ -5076,7 +5077,7 @@ 先发 先發 先发制人 先發制人 先发投手 先發投手 -先发投手羣 先發投手羣 +先发投手群 先發投手羣 先后 先後 先后 先后倒置 先後倒置 先后顺序 先後順序 @@ -5444,6 +5445,7 @@ 免参 免參 免征 免徵 免疫系统 免疫系統 +免胄 免冑 免试升学 免試升學 免试升高中班 免試升高中班 兔尽狗烹 兔盡狗烹 @@ -5656,7 +5658,7 @@ 八万多 八萬多 八万大藏经 八萬大藏經 八个 八個 -八仙桌上摆夜壸 八仙桌上擺夜壺 +八仙桌上摆夜壶 八仙桌上擺夜壺 八位元个人电脑 八位元個人電腦 八余 八餘 八克 八克 @@ -5815,7 +5817,8 @@ 六道轮回 六道輪迴 六面 六面 六面体 六面體 -六须鲶 六鬚鯰 +六须鲇 六鬚鮎 +六须鲶 六鬚鮎 兰克 蘭克 兰台 蘭臺 兰台令史 蘭臺令史 @@ -6166,6 +6169,7 @@ 冠状动脉硬化症 冠狀動脈硬化症 冠盖云集 冠蓋雲集 冠盖如云 冠蓋如雲 +冠胄 冠冑 冤仇 冤仇 冥凌 冥淩 冥凌浃行 冥淩浹行 @@ -6251,7 +6255,6 @@ 冰前刮雪 冰前颳雪 冰厂 冰廠 冰壶秋月 冰壺秋月 -冰壸秋月 冰壺秋月 冰岩 冰岩 冰斗 冰斗 冰杯 冰杯 @@ -7510,9 +7513,9 @@ 出笼 出籠 出笼鸟 出籠鳥 出籍 出籍 -出类拔羣 出類拔羣 +出类拔群 出類拔羣 出类拔萃 出類拔萃 -出类超羣 出類超羣 +出类超群 出類超羣 出粗 出粗 出粜 出糶 出粮 出糧 @@ -7530,8 +7533,8 @@ 出给 出給 出继 出繼 出缺 出缺 -出羣拔萃 出羣拔萃 出群 出羣 +出群拔萃 出羣拔萃 出老千 出老千 出脱 出脫 出自 出自 @@ -7713,18 +7716,17 @@ 分布 分佈 分布于 分佈於 分布区 分佈區 -分布图 分布圖 -分布圖 分布圖 +分布图 分佈圖 分布学习 分佈學習 分布式 分佈式 -分布式发展模型 分布式發展模型 +分布式发展模型 分佈式發展模型 分布式拒绝服务 分佈式拒絕服務 分布式环境 分佈式環境 分布式结构 分佈式結構 分布式网络 分佈式網絡 分布控制 分佈控制 分布范围 分佈範圍 -分布连结网络 分布連結網絡 +分布连结网络 分佈連結網絡 分当 分當 分录 分錄 分形几何 分形幾何 @@ -8388,7 +8390,7 @@ 制作商 製作商 制作好 製作好 制作成 製作成 -制作羣 製作羣 +制作群 製作羣 制作者 製作者 制作费 製作費 制假 製假 @@ -9312,7 +9314,6 @@ 千岛列岛 千島列島 千岛湖 千島湖 千岛湖事件 千島湖事件 -千岛羣岛 千島羣島 千岛群岛 千島羣島 千岛酱 千島醬 千岩万壑 千巖萬壑 @@ -9370,7 +9371,7 @@ 千疮百孔 千瘡百孔 千百万 千百萬 千百年 千百年 -千百成羣 千百成羣 +千百成群 千百成羣 千皓宣 千皓宣 千真万真 千真萬真 千真万确 千真萬確 @@ -9475,7 +9476,7 @@ 千难万险 千難萬險 千难万难 千難萬難 千面人 千面人 -千页羣岛 千頁羣島 +千页群岛 千頁羣島 千顷陂 千頃陂 千鬼百怪 千鬼百怪 千鸟 千鳥 @@ -9630,11 +9631,12 @@ 华彩 華彩 华志 華志 华星秋月之章 華星秋月之章 -华核 華核 +华核 華覈 华氏寒暑表 華氏寒暑表 华润万家 華潤萬家 华特里德 華特里德 华纳音乐集团 華納音樂集團 +华胄 華胄 华表 華表 华表鹤归 華表鶴歸 华里 華里 @@ -9651,7 +9653,7 @@ 协调出 協調出 卑梁之衅 卑梁之釁 卓别林 卓別林 -卓尔出羣 卓爾出羣 +卓尔出群 卓爾出羣 卓柏卡布拉 卓柏卡布拉 单一价 單一價 单一合体字 單一合體字 @@ -10451,6 +10453,7 @@ 压杆 壓桿 压板 壓板 压缩饼干 壓縮餅乾 +压胄子 壓冑子 压舌板 壓舌板 压面棍 壓麪棍 厌恶 厭惡 @@ -13055,8 +13058,8 @@ 合组 合組 合编 合編 合缝 合縫 -合羣性 合羣性 合群 合羣 +合群性 合羣性 合而为一 合而爲一 合股 合股 合肥 合肥 @@ -13771,6 +13774,7 @@ 名种 名種 名称标签 名稱標籤 名系 名系 +名胄 名胄 名胜 名勝 名胜古迹 名勝古蹟 名臣言行录 名臣言行錄 @@ -13886,7 +13890,7 @@ 后备部 後備部 后天 後天 后天免疫 後天免疫 -后天免疫缺乏症候羣 後天免疫缺乏症候羣 +后天免疫缺乏症候群 後天免疫缺乏症候羣 后天性 後天性 后夫 後夫 后头 後頭 @@ -13923,7 +13927,7 @@ 后座系 後座繫 后庭 後庭 后庭花 後庭花 -后弦 后弦 +后弦 後弦 后影 後影 后心 後心 后怕 後怕 @@ -14060,7 +14064,7 @@ 后羿 后羿 后羿射日 后羿射日 后翅 後翅 -后翻筋斗 后翻筋斗 +后翻筋斗 後翻筋斗 后者 後者 后肢 後肢 后背 後背 @@ -14383,13 +14387,13 @@ 启发法 啓發法 启示录 啓示錄 启蒙 啓蒙 -启蒙专制君主 啓矇專制君主 +启蒙专制君主 啓蒙專制君主 启蒙哲学 啓蒙哲學 启蒙时代 啓蒙時代 启蒙运动 啓蒙運動 吴下阿蒙 吳下阿蒙 吴俊杰 吳俊傑 -吴克羣 吳克羣 +吴克群 吳克羣 吴嘉种 吳嘉種 吴复连 吳復連 吴嶽修 吳嶽修 @@ -15788,7 +15792,7 @@ 团服 團服 团栾 團欒 团案 團案 -团沙羣岛 團沙羣島 +团沙群岛 團沙羣島 团牌 團牌 团状 團狀 团瓢 團瓢 @@ -15970,6 +15974,7 @@ 国立台湾图书馆 國立臺灣圖書館 国立台湾技术大学 國立臺灣技術大學 国立教育广播电台 國立教育廣播電臺 +国胄 國胄 国药 國藥 国语注音符号第一式 國語注音符號第一式 国语注音符号第二式 國語注音符號第二式 @@ -16194,6 +16199,7 @@ 地缘关系 地緣關係 地缝里 地縫裏 地老天荒不了情 地老天荒不了情 +地胄 地胄 地蜡 地蠟 地表 地表 地表水 地表水 @@ -16485,7 +16491,7 @@ 塞耳盗钟 塞耳盜鐘 塞药 塞藥 塞莉佛维克 塞莉佛維克 -塞车症候羣 塞車症候羣 +塞车症候群 塞車症候羣 塞韦里诺 塞韋裏諾 填个 填個 填了 填了 @@ -16522,6 +16528,7 @@ 士别三日 士別三日 士别三日刮目相待 士別三日刮目相待 士别多日 士別多日 +士胄 士胄 壮室之秋 壯室之秋 壮志 壯志 壮志凌云 壯志凌雲 @@ -16538,10 +16545,10 @@ 声情并茂 聲情並茂 声类系统 聲類系統 壳里 殼裏 +壶口瀑布 壺口瀑布 +壶范 壺範 壶里 壺裏 -壸口瀑布 壺口瀑布 -壸范 壺範 -壸里乾坤 壺裏乾坤 +壶里乾坤 壺裏乾坤 壹个人 壹個人 壹周刊 壹週刊 壹败涂地 壹敗塗地 @@ -16561,7 +16568,6 @@ 备抵折旧 備抵折舊 备注 備註 备注栏 備註欄 -复用 複用 复三 復三 复上 覆上 复业 復業 @@ -16805,6 +16811,7 @@ 复现 復現 复瓿 覆瓿 复生 復生 +复用 複用 复电 覆電 复盂 覆盂 复盂之固 覆盂之固 @@ -16962,7 +16969,7 @@ 外才 外才 外挂 外掛 外挂式 外掛式 -外文系 外文係 +外文系 外文系 外明不知里暗 外明不知裏暗 外来物种 外來物種 外来种 外來種 @@ -17356,7 +17363,7 @@ 大而无当 大而無當 大肆搜捕 大肆搜捕 大肠杆菌 大腸桿菌 -大肠杆菌羣 大腸桿菌羣 +大肠杆菌群 大腸桿菌羣 大胜 大勝 大胡子 大鬍子 大脑出血性中风 大腦出血性中風 @@ -17497,6 +17504,7 @@ 天津师范 天津師範 天津师范大学 天津師範大學 天渊之别 天淵之別 +天潢贵胄 天潢貴胄 天然纤维 天然纖維 天生干 天生幹 天盟誓表现 天盟誓表現 @@ -18184,8 +18192,8 @@ 娘舅 孃舅 娩出 娩出 娱乐台 娛樂臺 -娴于 嫻於 -娴于辞令 嫻於辭令 +娴于 嫺於 +娴于辞令 嫺於辭令 娶了 娶了 娶回 娶回 娶回家 娶回家 @@ -18195,7 +18203,7 @@ 婚后 婚後 婚姻制度 婚姻制度 婢仆 婢僕 -婴儿猝死症候羣 嬰兒猝死症候羣 +婴儿猝死症候群 嬰兒猝死症候羣 婴儿猝死综合症 嬰兒猝死綜合症 婶娘 嬸孃 媒人口无量斗 媒人口無量斗 @@ -18281,7 +18289,7 @@ 存扣 存扣 存折 存摺 存款准备率 存款準備率 -存款准备金 存款准備金 +存款准备金 存款準備金 存款准备金率 存款準備金率 孙协志 孫協志 孙大千 孫大千 @@ -18533,6 +18541,7 @@ 宝山空回 寶山空回 宝庄 寶莊 宝志 寶誌 +宝胄 寶胄 宝贝团 寶貝團 宝里宝气 寶里寶氣 宝鉴 寶鑑 @@ -18619,7 +18628,7 @@ 家里 家裏 家里的 家裏的 家长制 家長制 -家长里短 家長里短 +家长里短 家長裏短 家门不幸 家門不幸 家门有幸 家門有幸 宸极 宸極 @@ -18872,9 +18881,9 @@ 将相本无种 將相本無種 将遇良才 將遇良才 将门之后 將門之後 -小丑 小丑 +小丑 小丑 小醜 小丑丫鬟 小醜丫鬟 -小丑跳梁 小丑跳樑 +小丑跳梁 小醜跳樑 小丑鱼 小丑魚 小业种 小業種 小个 小個 @@ -18894,7 +18903,7 @@ 小便斗 小便斗 小修 小修 小傢伙 小傢伙 -小儿麻痹症 小兒麻痺症 +小儿麻痹症 小兒麻痹症 小克 小克 小冬 小冬 小冲突 小衝突 @@ -20225,6 +20234,7 @@ 帝制 帝制 帝制时代 帝制時代 帝后 帝后 +帝胄 帝胄 带丑闻 帶醜聞 带个 帶個 带个好 帶個好 @@ -20634,7 +20644,6 @@ 干绷 乾繃 干绷儿 乾繃兒 干缺 幹缺 -干羣关系 幹羣關係 干群 幹羣 干群关系 幹羣關係 干耗 乾耗 @@ -21022,8 +21031,10 @@ 幸运草 幸運草 幸进 倖進 幸逢 幸逢 +幺么小丑 幺麼小醜 幺并矢 幺並矢 -幺麽小丑 幺麼小丑 +幺麼小丑 幺麼小醜 +幺麽小丑 幺麼小醜 幻出 幻出 幻念 幻念 幻想曲 幻想曲 @@ -21283,7 +21294,6 @@ 建筑科 建築科 建筑系 建築系 建筑结构 建築結構 -建筑羣 建築羣 建筑群 建築羣 建筑艺术 建築藝術 建筑节 建築節 @@ -22228,7 +22238,7 @@ 征信所 徵信所 征信社 徵信社 征候 徵候 -征候羣 徵候羣 +征候群 徵候羣 征兆 徵兆 征兵 徵兵 征兵制 徵兵制 @@ -22515,7 +22525,7 @@ 德里 德里 德里达 德里達 德高而毁来 德高而譭來 -徼幸 徼幸 +徼幸 徼倖 心不甘情不愿 心不甘情不願 心于 心於 心余 心餘 @@ -22574,7 +22584,7 @@ 心脏病 心臟病 心脏病发 心臟病發 心脏病史 心臟病史 -心脏痲痹 心臟痲痺 +心脏痲痹 心臟痲痹 心脏痲痺 心臟痲痺 心脏科 心臟科 心脏移植 心臟移植 @@ -22584,7 +22594,7 @@ 心脏衰竭 心臟衰竭 心脏计 心臟計 心脏镜 心臟鏡 -心脏麻痹 心臟麻痺 +心脏麻痹 心臟麻痹 心脏麻痺 心臟麻痺 心花怒发 心花怒發 心荡 心蕩 @@ -23090,8 +23100,8 @@ 恶名昭彰 惡名昭彰 恶名昭著 惡名昭著 恶哏哏 惡哏哏 -恶唑啉 惡唑啉 -恶唑啉酮 惡唑啉酮 +恶唑啉 噁唑啉 +恶唑啉酮 噁唑啉酮 恶因 惡因 恶地 惡地 恶声 惡聲 @@ -23276,6 +23286,7 @@ 悬河注火 懸河注火 悬灯结彩 懸燈結彩 悬肠挂肚 懸腸掛肚 +悬胄 懸冑 悬臂梁 懸臂樑 悬车致仕 懸車致仕 悬针 懸針 @@ -23625,7 +23636,6 @@ 战斗旅 戰鬥旅 战斗机 戰鬥機 战斗编组 戰鬥編組 -战斗羣 戰鬥羣 战斗群 戰鬥羣 战斗者 戰鬥者 战斗舰 戰鬥艦 @@ -25178,10 +25188,10 @@ 拔宅飞升 拔宅飛昇 拔山志 拔山志 拔山曲 拔山曲 -拔羣出类 拔羣出類 -拔羣出萃 拔羣出萃 +拔群出类 拔羣出類 +拔群出萃 拔羣出萃 拔萃出类 拔萃出類 -拔萃出羣 拔萃出羣 +拔萃出群 拔萃出羣 拔萝卜 拔蘿蔔 拔虎须 拔虎鬚 拔须 拔鬚 @@ -26076,7 +26086,7 @@ 提了 提了 提价 提價 提克瑞提 提克瑞提 -提克里特 提克裏特 +提克里特 提克里特 提出 提出 提出去 提出去 提出建议 提出建議 @@ -26630,6 +26640,7 @@ 支烟 支菸 支系 支系 支系统 支系統 +支胄 支胄 收了 收了 收出 收出 收出去 收出去 @@ -26816,6 +26827,7 @@ 教育团体 教育團體 教育方针 教育方針 教育系 教育系 +教胄 教胄 教范 教範 敝帚千金 敝帚千金 敝舍 敝舍 @@ -27270,7 +27282,7 @@ 断面图 斷面圖 斯伯丁杯 斯伯丁盃 斯克 斯克 -斯克里亚宾 斯克裏亞賓 +斯克里亚宾 斯克里亞賓 斯坦贝克 斯坦貝克 斯干 斯干 斯当东 斯當東 @@ -27281,10 +27293,10 @@ 斯洛伐克 斯洛伐克 斯洛伐克共和国 斯洛伐克共和國 斯洛伐克语 斯洛伐克語 -斯瓦希里 斯瓦希裏 -斯瓦希里语 斯瓦希裏語 +斯瓦希里 斯瓦希里 +斯瓦希里语 斯瓦希里語 斯瓦特谷地 斯瓦特谷地 -斯科普里 斯科普裏 +斯科普里 斯科普里 斯芬克士 斯芬克士 斯芬克斯 斯芬克斯 斯诺克 斯諾克 @@ -27292,7 +27304,7 @@ 斯里 斯里 斯里兰卡 斯里蘭卡 斯里兰卡民主社会主义共和国 斯里蘭卡民主社會主義共和國 -斯里兰卡电信 斯裏蘭卡電信 +斯里兰卡电信 斯里蘭卡電信 斯里巴加湾港 斯里巴加灣港 斯里査潘 斯里查潘 斯雷布雷尼察 斯雷布雷尼察 @@ -27329,7 +27341,7 @@ 新台 新臺 新台币 新臺幣 新叶 新葉 -新喀里多尼亚 新喀裏多尼亞 +新喀里多尼亚 新喀里多尼亞 新城电台 新城電臺 新娘 新娘 新娘子 新娘子 @@ -28061,6 +28073,7 @@ 景从云合 景從雲合 景从云集 景從雲集 景星庆云 景星慶雲 +景胄 景胄 景致 景緻 景谷 景谷 景谷傣族彝族自治县 景谷傣族彝族自治縣 @@ -28448,7 +28461,7 @@ 曲院 麴院 曲隐 曲隱 曲霉 麴黴 -曲霉毒素 曲黴毒素 +曲霉毒素 麴黴毒素 曲靖 曲靖 曲靖地区 曲靖地區 曲靖市 曲靖市 @@ -28728,6 +28741,7 @@ 望眼欲穿 望眼欲穿 望秋先零 望秋先零 望穿秋水 望穿秋水 +望胄 望胄 朝东面 朝東面 朝乾夕惕 朝乾夕惕 朝云 朝雲 @@ -28826,6 +28840,7 @@ 末大必折 末大必折 末娘 末娘 末末了 末末了 +末胄 末胄 末药 末藥 本价 本價 本位制 本位制 @@ -29203,7 +29218,7 @@ 杆儿 杆兒 桿兒 杆刀 桿刀 杆塔 杆塔 -杆子 杆子 +杆子 杆子 桿子 杆状 桿狀 杆直 桿直 杆秤 桿秤 @@ -29273,6 +29288,7 @@ 村子里 村子裏 村干事 村幹事 村庄 村莊 +村胄 村胄 村舍 村舍 村里 村裏 村里长 村裏長 @@ -29922,7 +29938,7 @@ 林秀合 林秀合 林秋 林秋 林秋桂 林秋桂 -林羣志 林羣志 +林群志 林羣志 林致光 林致光 林芳郁 林芳郁 林英杰 林英傑 @@ -29951,6 +29967,7 @@ 枝干 枝幹 枝干断折 枝幹斷折 枝繁叶茂 枝繁葉茂 +枝胄 枝胄 枝针 枝針 枣庄 棗莊 枣核 棗核 @@ -30301,7 +30318,7 @@ 核配 核配 核酪 核酪 核酶 核酶 -核酸 覈酸 +核酸 核酸 核销 覈銷 核防御 核防禦 核验 覈驗 @@ -31245,6 +31262,7 @@ 毫针 毫針 毯里 毯裏 氏症 氏症 +氏胄 氏胄 民丰 民豐 民丰县 民豐縣 民主党 民主黨 @@ -32322,6 +32340,7 @@ 洪杰鸿 洪傑鴻 洪泛 洪泛 洪炉燎发 洪爐燎髮 +洪胄 洪胄 洪若朴 洪若樸 洪范 洪範 洪适 洪适 @@ -32897,6 +32916,7 @@ 清汤挂面 清湯掛麪 清浊同流 清濁同流 清算斗争 清算鬥爭 +清胄 清胄 清芬志 清芬志 清贫寡欲 清貧寡欲 清酒红人面财帛动人心 清酒紅人面財帛動人心 @@ -32922,6 +32942,8 @@ 渡了 渡了 渡假胜地 渡假勝地 渡头云 渡頭雲 +渡海小轮 渡海小輪 +渡轮 渡輪 渥兹尼克 渥茲尼克 温克 溫克 温卷 溫卷 @@ -33336,7 +33358,7 @@ 滤出来 濾出來 滥发 濫發 滨松市 濱松市 -滩涂 灘涂 +滩涂 灘塗 滴了 滴了 滴了天 滴了天 滴修都速 滴修都速 @@ -33496,7 +33518,7 @@ 瀍河回族区 瀍河回族區 瀑布 瀑布 瀑布区 瀑布區 -瀑布羣 瀑布羣 +瀑布群 瀑布羣 瀛台 瀛臺 瀛表 瀛表 灌个 灌個 @@ -34422,10 +34444,9 @@ 狎游 狎遊 狐借虎威 狐藉虎威 狐朋狗党 狐朋狗黨 -狐羣狗党 狐羣狗黨 狐群狗党 狐羣狗黨 狐裘蒙戎 狐裘蒙戎 -狗党狐羣 狗黨狐羣 +狗党狐群 狗黨狐羣 狗占马坑 狗占馬坑 狗口里吐不出象牙 狗口裏吐不出象牙 狗嘴里 狗嘴裏 @@ -34540,6 +34561,7 @@ 玄机暗藏 玄機暗藏 玄武岩 玄武岩 玄武质熔岩 玄武質熔岩 +玄胄 玄胄 玄针 玄鍼 玄黄翻复 玄黃翻覆 率先垂范 率先垂範 @@ -34600,7 +34622,7 @@ 王彩碧 王彩碧 王志华 王志華 王志文 王志文 -王志羣 王志羣 +王志群 王志羣 王志贞 王志貞 王杰 王傑 王杰胜 王傑勝 @@ -34967,6 +34989,8 @@ 甲种国民兵役 甲種國民兵役 甲种维生素 甲種維生素 甲第连云 甲第連雲 +甲胄 甲冑 +甲胄鱼类 甲冑魚類 甲虫 甲蟲 甲虫类 甲蟲類 甲虫车 甲蟲車 @@ -35207,7 +35231,6 @@ 病虫害 病蟲害 症侯群 症侯羣 症候 症候 -症候羣 症候羣 症候群 症候羣 症状 症狀 症状性 症狀性 @@ -35233,7 +35256,7 @@ 痨虫 癆蟲 痫症 癇症 痰症 痰症 -痲痹不了 痲痺不了 +痲痹不了 痲痹不了 痲痺不了 痲痺不了 痴呆症 癡呆症 痴念 癡念 @@ -35359,7 +35382,7 @@ 白血球过多症 白血球過多症 白里安 白里安 白里透红 白裏透紅 -白雪公主症候羣 白雪公主症候羣 +白雪公主症候群 白雪公主症候羣 白雪曲 白雪曲 白霉 白黴 白面 白麪 @@ -35523,6 +35546,7 @@ 皇极历 皇極曆 皇极历史 皇極歷史 皇极数 皇極數 +皇胄 皇胄 皇辟 皇辟 皓发 皓髮 皓月千里 皓月千里 @@ -36367,6 +36391,7 @@ 神经系统 神經系統 神经纤维 神經纖維 神经纤维瘤 神經纖維瘤 +神胄 神胄 神荼郁垒 神荼鬱壘 神迹 神蹟 神采 神采 @@ -36456,7 +36481,6 @@ 禄丰 祿豐 禄丰县 祿豐縣 福克 福克 -福克兰羣岛 福克蘭羣島 福克兰群岛 福克蘭羣島 福克斯 福克斯 福克纳 福克納 @@ -37516,6 +37540,7 @@ 簇合 簇合 簌簌发抖 簌簌發抖 簪笔磬折 簪筆磬折 +簪缨世胄 簪纓世胄 簳面杖 簳麪杖 簸荡 簸盪 簿历 簿歷 @@ -37672,6 +37697,7 @@ 糟透了 糟透了 糟齿类爬虫 糟齒類爬蟲 糠穗 糠穗 +糯米团 糯米糰 系一片 係一片 系一番 係一番 系一种 係一種 @@ -37753,6 +37779,7 @@ 系缆 繫纜 系缚 繫縛 系而不食 繫而不食 +系胄 系胄 系腰 繫腰 系臂 係臂 系臂之宠 繫臂之寵 @@ -37840,7 +37867,7 @@ 累块积苏 累塊積蘇 累堆 累堆 累瓦结绳 累瓦結繩 -累积性伤害症候羣 累積性傷害症候羣 +累积性伤害症候群 累積性傷害症候羣 累积折耗 累積折耗 累绁 累紲 累臣 累臣 @@ -37858,6 +37885,7 @@ 繃价 繃價 繃针 繃針 纂修 纂修 +纂胄 纂胄 纠合 糾合 纡余 紆餘 纡回 紆迴 @@ -38214,9 +38242,9 @@ 结伴同行 結伴同行 结余 結餘 结党 結黨 -结党聚羣 結黨聚羣 +结党聚群 結黨聚羣 结党营私 結黨營私 -结党连羣 結黨連羣 +结党连群 結黨連羣 结出 結出 结制 結制 结发 結髮 @@ -38346,6 +38374,7 @@ 继志述事 繼志述事 继续干 繼續幹 绪余 緒餘 +绪胄 緒胄 续借 續借 续借手续 續借手續 续发性 續發性 @@ -38647,7 +38676,7 @@ 羊拐 羊拐 羊毛出在羊身上 羊毛出在羊身上 羊瘙痒症 羊瘙癢症 -羊羣里跑出骆驼来 羊羣裏跑出駱駝來 +羊群里跑出骆驼来 羊羣裏跑出駱駝來 羊肉落在狗嘴里 羊肉落在狗嘴裏 羊膜穿刺术 羊膜穿刺術 羊舍 羊舍 @@ -38724,14 +38753,14 @@ 羞面见人 羞面見人 羡余 羨餘 羡叹 羨歎 -羣众关系 羣衆關係 -羣而不党 羣而不黨 -羣谋咸同 羣謀咸同 -羣轻折轴 羣輕折軸 群丑 羣醜 +群众关系 羣衆關係 群众团体 羣衆團體 群后 羣后 群系 羣系 +群而不党 羣而不黨 +群谋咸同 羣謀咸同 +群轻折轴 羣輕折軸 群辟 羣辟 群雕 羣雕 羹里来饭里去 羹裏來飯裏去 @@ -39032,7 +39061,7 @@ 联系实际 聯繫實際 联系方式 聯繫方式 联系汇率 聯繫匯率 -联系羣众 聯繫羣衆 +联系群众 聯繫羣衆 联赛杯 聯賽盃 联邦制 聯邦制 聘任制 聘任制 @@ -39170,6 +39199,18 @@ 胃药 胃藥 胃药片 胃藥片 胃里 胃裏 +胄嗣 胄嗣 +胄子 胄子 +胄序 胄序 +胄族 胄族 +胄甲 冑甲 +胄监 胄監 +胄科 冑科 +胄绪 胄緒 +胄胤 胄胤 +胄裔 胄裔 +胄裔繁衍 胄裔繁衍 +胄阀 胄閥 胆大于天 膽大於天 胆大如斗 膽大如斗 胆石症 膽石症 @@ -39524,6 +39565,7 @@ 胡麻 胡麻 胡麻油 胡麻油 胡麻籽 胡麻籽 +胤胄 胤胄 胰淀粉酶 胰澱粉酶 胰脏 胰臟 胰脏炎 胰臟炎 @@ -39694,7 +39736,7 @@ 腐肠之药 腐腸之藥 腑脏 腑臟 腕表 腕錶 -腕道症候羣 腕道症候羣 +腕道症候群 腕道症候羣 腕隧道症 腕隧道症 腕隧道症候群 腕隧道症候羣 腥黑穗病 腥黑穗病 @@ -40158,7 +40200,7 @@ 艾希克罗 艾希克羅 艾弥尔 艾彌爾 艾德蒙斯 艾德蒙斯 -艾德蒙顿 艾德矇頓 +艾德蒙顿 艾德蒙頓 艾斯托利尔 艾斯托利爾 艾瑞克 艾瑞克 艾瑞克森 艾瑞克森 @@ -40503,6 +40545,7 @@ 苗栗县 苗栗縣 苗栗市 苗栗市 苗种 苗種 +苗胄 苗胄 苛政猛于虎 苛政猛於虎 苜蓿长栏干 苜蓿長欄干 苞叶 苞葉 @@ -40559,6 +40602,7 @@ 英气风发 英氣風發 英烈千秋 英烈千秋 英联合王国 英聯合王國 +英胄 英胄 英语系 英語系 英里 英里 英雄交响曲 英雄交響曲 @@ -40602,8 +40646,8 @@ 范伦铁诺 范倫鐵諾 范佩西 范佩西 范例 範例 -范光羣 范光羣 -范公偁 範公偁 +范光群 范光羣 +范公偁 范公偁 范公堤 范公堤 范冰冰 范冰冰 范可钦 范可欽 @@ -40652,7 +40696,7 @@ 范文藤 范文藤 范文虎 范文虎 范文选读 範文選讀 -范斯坦 範斯坦 +范斯坦 范斯坦 范晓萱 范曉萱 范晔 范曄 范本 範本 @@ -40663,6 +40707,7 @@ 范洪森 范洪森 范湘暄 范湘暄 范特尔 范特爾 +范特西 范特西 范玮琪 范瑋琪 范琪斐 范琪斐 范甘迪 范甘迪 @@ -40968,7 +41013,7 @@ 荷雷克 荷雷克 莎玛海耶克 莎瑪海耶克 莒光周 莒光週 -莜面 莜面 +莜面 莜麪 莫三比克 莫三比克 莫三比克人民共和国 莫三比克人民共和國 莫乃耳合金 莫乃耳合金 @@ -40980,7 +41025,7 @@ 莫干山 莫干山 莫当 莫當 莫扎特 莫扎特 -莫扎里拉 莫扎裏拉 +莫扎里拉 莫扎里拉 莫折大提 莫折大提 莫报万一 莫報萬一 莫曼斯克 莫曼斯克 @@ -41281,7 +41326,7 @@ 蒙山 蒙山 蒙山县 蒙山縣 蒙巴萨 蒙巴薩 -蒙巴顿 矇巴頓 +蒙巴顿 蒙巴頓 蒙师 蒙師 蒙帕纳斯 蒙帕納斯 蒙席 蒙席 @@ -41301,22 +41346,22 @@ 蒙昧 矇昧 蒙昧不清 濛昧不清 蒙昧无知 矇昧無知 -蒙松雨 矇松雨 +蒙松雨 濛鬆雨 蒙求 蒙求 蒙汗药 蒙汗藥 蒙汜 濛汜 蒙混 矇混 蒙混过关 矇混過關 蒙爱 蒙愛 -蒙牛 矇牛 +蒙牛 蒙牛 蒙特 蒙特 蒙特內哥罗 蒙特內哥羅 蒙特利 蒙特利 蒙特利尔 蒙特利爾 蒙特卡洛 蒙特卡洛 -蒙特卡洛法 矇特卡洛法 +蒙特卡洛法 蒙特卡洛法 蒙特卡罗 蒙特卡羅 -蒙特卡罗方法 矇特卡羅方法 +蒙特卡罗方法 蒙特卡羅方法 蒙特塞拉特 蒙特塞拉特 蒙特娄 蒙特婁 蒙特维多 蒙特維多 @@ -41502,7 +41547,7 @@ 虎斗龙争 虎鬥龍爭 虎甲虫 虎甲蟲 虎皮松 虎皮松 -虎荡羊羣 虎蕩羊羣 +虎荡羊群 虎蕩羊羣 虎须 虎鬚 虏获 虜獲 虑周行果 慮周行果 @@ -41757,6 +41802,7 @@ 血线虫 血線蟲 血缘关系 血緣關係 血肉淋漓 血肉淋漓 +血胄 血胄 血胡同 血衚衕 血脂升高症 血脂升高症 血色素沉积症 血色素沉積症 @@ -42106,7 +42152,9 @@ 被发入山 被髮入山 被发左衽 被髮左衽 被发文身 被髮文身 +被发现 被發現 被发缨冠 被髮纓冠 +被发觉 被發覺 被发阳狂 被髮陽狂 被复 被複 被头散发 被頭散髮 @@ -42151,6 +42199,7 @@ 装配工厂 裝配工廠 装门面 裝門面 裒克 裒剋 +裔胄 裔胄 裕丰 裕豐 裘弊金尽 裘弊金盡 裘馨氏肌肉萎缩症 裘馨氏肌肉萎縮症 @@ -42603,6 +42652,7 @@ 训兽术 訓獸術 训练出 訓練出 训练出来 訓練出來 +训胄 訓胄 训蒙 訓蒙 议事录 議事錄 议事纪录 議事紀錄 @@ -42795,6 +42845,7 @@ 试炼 試煉 试种 試種 试算表 試算表 +试胄 試胄 试药 試藥 试表 試表 试验台 試驗檯 @@ -43183,6 +43234,7 @@ 豪气万丈 豪氣萬丈 豪气万千 豪氣萬千 豪气干云 豪氣干雲 +豪门贵胄 豪門貴胄 豫游 豫遊 豺狼当涂 豺狼當塗 豺狼当路 豺狼當路 @@ -43223,6 +43275,7 @@ 贝尔杰 貝爾傑 贝当 貝當 贝理克 貝理克 +贝胄 貝冑 贝西克 貝西克 贝那芬托 貝那芬托 贝里 貝里 @@ -43272,6 +43325,7 @@ 贤后 賢后 贤奸倒置 賢奸倒置 贤才 賢才 +贤胄 賢胄 败于 敗於 败于垂成 敗於垂成 败兵折将 敗兵折將 @@ -43347,6 +43401,7 @@ 贵戚 貴戚 贵极人臣 貴極人臣 贵游子弟 貴遊子弟 +贵胄 貴胄 贵贱之别 貴賤之別 贷个 貸個 贷了 貸了 @@ -43676,7 +43731,7 @@ 超级台风 超級颱風 超级杯 超級盃 超级链接 超級鏈接 -超羣出众 超羣出衆 +超群出众 超羣出衆 超范围 超範圍 超计划利润 超計劃利潤 超赞 超讚 @@ -43803,7 +43858,7 @@ 跳墙出去 跳牆出去 跳板 跳板 跳梁 跳梁 -跳梁小丑 跳樑小丑 +跳梁小丑 跳樑小醜 跳梁猖獗之小丑 跳樑猖獗之小醜 跳楼自尽 跳樓自盡 跳水台 跳水臺 @@ -43888,6 +43943,7 @@ 身正为范 身正爲範 身系囹圄 身繫囹圄 躬先表率 躬先表率 +躬擐甲胄 躬擐甲冑 躯干 軀幹 躯干骨 軀幹骨 躲一棒槌挨一榔头 躲一棒槌挨一榔頭 @@ -44346,6 +44402,7 @@ 这出电影 這齣電影 这只 這隻 这只不 這只不 +这只是 這只是 这回 這回 这回事 這回事 这当儿 這當兒 @@ -44416,6 +44473,7 @@ 远程登录 遠程登錄 远端监控系统 遠端監控系統 远端签入 遠端簽入 +远胄 遠胄 远胜 遠勝 远距图书服务系统 遠距圖書服務系統 远远落后 遠遠落後 @@ -44791,7 +44849,7 @@ 逸才 逸才 逸欲 逸欲 逸游自恣 逸游自恣 -逸羣之才 逸羣之才 +逸群之才 逸羣之才 逸致 逸緻 逻辑链路控制 邏輯鏈路控制 逼上梁山 逼上梁山 @@ -44821,6 +44879,7 @@ 遏恶扬善 遏惡揚善 遐布 遐布 遐志 遐志 +遐胄 遐胄 道不了 道不了 道不出 道不出 道不同不相为谋 道不同不相爲謀 @@ -44854,6 +44913,7 @@ 遗恨千古 遺恨千古 遗愿 遺願 遗才 遺才 +遗胄 遺胄 遗臭万代 遺臭萬代 遗臭万年 遺臭萬年 遗臭万载 遺臭萬載 @@ -44871,6 +44931,8 @@ 遥地里 遙地裏 遥念 遙念 遥测技术 遙測技術 +遥胄 遙胄 +遥遥华胄 遙遙華胄 遨游 遨遊 遨游四海 遨遊四海 遨游天下 遨遊天下 @@ -45607,6 +45669,7 @@ 金穗奖 金穗獎 金粉楼台 金粉樓臺 金线虫 金線蟲 +金胄 金胄 金花虫 金花蟲 金范 金範 金虫 金蟲 @@ -45841,9 +45904,9 @@ 钟盘 鐘盤 钟相 鐘相 钟磬 鐘磬 -钟祥 鐘祥 -钟祥县 鐘祥縣 -钟祥市 鐘祥市 +钟祥 鍾祥 +钟祥县 鍾祥縣 +钟祥市 鍾祥市 钟福松 鐘福松 钟纽 鐘紐 钟罩 鐘罩 @@ -45984,6 +46047,7 @@ 铝制品 鋁製品 铝合金 鋁合金 铝板 鋁板 +铠胄 鎧冑 铯钟 銫鐘 铰链 鉸鏈 铰链叶 鉸鏈葉 @@ -46253,6 +46317,7 @@ 门板 門板 门殚户尽 門殫戶盡 门皁 門皁 +门胄 門胄 门里 門裏 门里人 門裏人 门里出身 門裏出身 @@ -46523,7 +46588,7 @@ 阿兹海默症病患 阿茲海默症病患 阿列克西斯 阿列克西斯 阿利托 阿利托 -阿加莎克里斯蒂 阿加莎克裏斯蒂 +阿加莎克里斯蒂 阿加莎克里斯蒂 阿卜杜拉 阿卜杜拉 阿卡提里 阿卡提里 阿历山大 阿歷山大 @@ -47563,6 +47628,7 @@ 韦庄 韋莊 韦陟朵云 韋陟朵雲 韧皮纤维 韌皮纖維 +韩侂胄 韓侂冑 韩信登坛 韓信登壇 韩制 韓製 韩升洙 韓昇洙 @@ -47944,6 +48010,7 @@ 食货志 食貨志 食野之苹 食野之苹 食面 食麪 +飮胄 飮冑 餍于游乐 饜於游樂 餐台 餐檯 餐松啖柏 餐松啖柏 @@ -48330,6 +48397,7 @@ 高纤维 高纖維 高级管理人才 高級管理人才 高耸入云 高聳入雲 +高胄 高胄 高能烈性炸药 高能烈性炸藥 高脂血症 高脂血症 高脚杯 高腳杯 @@ -48413,6 +48481,7 @@ 鱼种 魚種 鱼篮宝卷 魚籃寶卷 鱼肉乡里 魚肉鄉里 +鱼胄 魚冑 鱼虫 魚蟲 鱼贯而出 魚貫而出 鱼鳞松 魚鱗松 @@ -48425,8 +48494,10 @@ 鲇鱼 鮎魚 鲋鱼困涸辙难待西江水 鮒魚困涸轍難待西江水 鲍德里亚 鮑德里亞 +鲜于 鮮于 鲜彩 鮮彩 鲜明个性 鮮明個性 +鲜胄 鮮胄 鲜血淋漓 鮮血淋漓 鲜谷王 鮮穀王 鲸蜡 鯨蠟 @@ -48483,6 +48554,7 @@ 鸿案相庄 鴻案相莊 鸿篇巨制 鴻篇鉅製 鸿篇巨著 鴻篇鉅著 +鸿胄 鴻胄 鸿范 鴻範 鸿蒙 鴻蒙 鸿运当头 鴻運當頭 @@ -48574,7 +48646,7 @@ 麻栗坡 麻栗坡 麻栗坡县 麻栗坡縣 麻油厂 麻油廠 -麻痹不了 麻痺不了 +麻痹不了 麻痹不了 麻痺不了 麻痺不了 麻胡 麻胡 麻药 麻藥 @@ -48626,7 +48698,7 @@ 黄明志 黃明志 黄曲毒素 黃麴毒素 黄曲霉 黃麴黴 -黄曲霉毒素 黃曲黴毒素 +黄曲霉毒素 黃麴黴毒素 黄曲霉菌 黃麴黴菌 黄有才 黃有才 黄梁 黃梁 @@ -48635,6 +48707,7 @@ 黄毛团儿 黃毛團兒 黄沙盖面 黃沙蓋面 黄河大合唱 黃河大合唱 +黄炎贵胄 黃炎貴胄 黄珮筑 黃珮筑 黄白之术 黃白之術 黄石大峡谷 黃石大峽谷 @@ -48814,6 +48887,7 @@ 齿发 齒髮 齿录 齒錄 齿条千斤顶 齒條千斤頂 +齿胄 齒胄 齿落发白 齒落髮白 龄虫 齡蟲 龙争虎斗 龍爭虎鬥 @@ -48836,6 +48910,7 @@ 龙烟铁矿 龍煙鐵礦 龙眼干 龍眼乾 龙种 龍種 +龙胄 龍胄 龙胜县 龍勝縣 龙虎并伏 龍虎並伏 龙虎斗 龍虎鬥 diff --git a/data/trad_to_simp/characters.txt b/data/dictionary/TSCharacters.txt similarity index 82% rename from data/trad_to_simp/characters.txt rename to data/dictionary/TSCharacters.txt index 9df1069..3efdaab 100644 --- a/data/trad_to_simp/characters.txt +++ b/data/dictionary/TSCharacters.txt @@ -1,34 +1,64 @@ +㑮 𫝈 㑯 㑔 㑳 㑇 +㑶 㐹 㒓 𠉂 㓨 刾 +㔋 𪟎 +㖮 𪠵 㗲 𠵾 +㘓 𪢌 㘚 㘎 +㛝 𫝦 㜄 㚯 㜏 㛣 +㜐 𫝧 㜢 𡞱 㜷 𡝠 㞞 𪨊 +㟺 𪩇 㠏 㟆 +㢗 𪪑 㢝 𢋈 㥮 㤘 㦎 𢛯 +㦞 𪫷 +㨻 𪮃 +㩋 𪮋 㩜 㨫 㩳 㧐 +㪎 𪯋 +㵾 𪷍 +㶆 𫞛 㷿 𤈷 +㹽 𫞣 㺏 𤠋 +㺜 𪺻 +㻶 𪼋 +㿖 𪽮 㿧 𤽯 䀹 𥅴 䁪 𥇢 䁻 䀥 +䅐 𫀨 +䅳 𫀬 +䆉 𫁂 +䉑 𫁲 䉙 𥬀 䉬 𫂈 䉲 𥮜 +䉶 𫁷 䊭 𥺅 䊷 䌶 +䊺 𫄚 +䋃 𫄜 +䋔 𫄞 䋙 䌺 䋚 䌻 +䋦 𫄩 +䋹 䌿 䋻 䌾 +䋼 𫄮 䋿 𦈓 䌈 𦈖 䌋 𦈘 @@ -37,51 +67,109 @@ 䌟 𦈞 䌥 𦈠 䌰 𦈙 +䍤 𫅅 +䎙 𫅭 +䎱 䎬 +䕤 𫟕 䕳 𦰴 +䖅 𫟑 +䗅 𫊪 䗿 𧉞 +䙔 𫋲 䙡 䙌 +䚩 𫌯 +䛄 𫍠 +䛳 𫍫 䜀 䜧 +䜖 𫟢 +䝭 𫎧 䝻 𧹕 䝼 䞍 䞈 𧹑 +䞋 𫎪 +䞓 𫎭 +䟃 𫎺 +䟆 𫎳 +䟐 𫎱 +䠆 𫏃 +䡐 𫟤 +䡩 𫟥 +䡵 𫟦 䢨 𨑹 +䤤 𫟺 +䥄 𫠀 䥇 䦂 +䥗 𫔋 䥩 𨱖 +䥯 𫔆 䥱 䥾 䦘 𨸄 䦛 䦶 䦟 䦷 +䦯 𫔵 䦳 𨷿 䧢 𨸟 +䪊 𫖅 䪏 𩏼 䪗 𩐀 䪘 𩏿 +䪴 𫖫 +䪾 𫖬 +䫀 𫖱 +䫂 𫖰 +䫟 𫖲 䫴 𩖗 +䫶 𫖺 +䫻 𫗇 +䫾 𫠈 +䬓 𫗊 䬘 𩙮 䬝 𩙯 䬞 𩙧 +䬧 𫗟 䭀 𩠇 䭃 𩠈 +䭑 𫗱 +䭔 𫗰 䭿 𩧭 +䮄 𫠊 䮝 𩧰 䮞 𩨁 䮠 𩧿 䮫 𩨇 +䮰 𫘮 䮳 𩨏 䮾 𩧪 䯀 䯅 䰾 鲃 +䱀 𫚐 +䱁 𫚏 䱙 𩾈 +䱧 𫚠 䱬 𩾊 䱰 𩾋 䱷 䲣 +䱸 𫠑 䱽 䲝 䲁 鳚 +䲅 𫚜 䲖 𩾂 +䲘 鳤 䲰 𪉂 +䳜 𫛬 +䳢 𫛰 +䳤 𫛮 +䳧 𫛺 +䳫 𫛼 䴉 鹮 +䴋 𫜅 䴬 𪎈 +䴱 𫜒 䴴 𪎋 +䴽 𫜔 +䵴 𫜙 +䶕 𫜨 +䶲 𫜳 万 万 丑 丑 丟 丢 @@ -134,6 +222,7 @@ 側 侧 偵 侦 偽 伪 +傌 㐷 傑 杰 傖 伧 傘 伞 @@ -166,7 +255,7 @@ 儐 傧 儔 俦 儕 侪 -儘 尽 +儘 尽 侭 償 偿 儣 𠆲 優 优 @@ -185,6 +274,7 @@ 內 内 兩 两 冊 册 +冑 胄 冪 幂 冬 冬 准 准 @@ -214,7 +304,7 @@ 創 创 剷 铲 剾 𠛅 -劃 划 +劃 划 㓰 劇 剧 劉 刘 劊 刽 @@ -230,6 +320,7 @@ 勝 胜 勞 劳 勢 势 +勣 𪟝 勩 勚 勱 劢 勳 勋 @@ -282,6 +373,7 @@ 員 员 哯 𠯟 唄 呗 +唓 𪠳 唚 吣 唸 念 問 问 @@ -314,17 +406,21 @@ 嘗 尝 嘜 唛 嘩 哗 +嘪 𪡃 嘮 唠 嘯 啸 嘰 叽 +嘳 𪡞 嘵 哓 嘸 呒 +嘺 𪡀 嘽 啴 噁 恶 噅 𠯠 噓 嘘 噚 㖊 噝 咝 +噞 𪡋 噠 哒 噥 哝 噦 哕 @@ -333,13 +429,14 @@ 噲 哙 噴 喷 噸 吨 -噹 当 +噹 当 𪠽 嚀 咛 嚇 吓 嚌 哜 嚐 尝 嚕 噜 嚙 啮 +嚛 𪠸 嚥 咽 嚦 呖 嚨 咙 @@ -348,14 +445,17 @@ 嚳 喾 嚴 严 嚶 嘤 +嚽 𪢕 囀 啭 囁 嗫 囂 嚣 +囃 𠱞 囅 冁 囈 呓 囉 啰 囌 苏 囑 嘱 +囒 𪢠 回 回 囪 囱 困 困 @@ -370,11 +470,13 @@ 坐 坐 垵 埯 埡 垭 +埬 𪣆 埰 采 執 执 堅 坚 堊 垩 堖 垴 +堚 𪣒 堝 埚 堯 尧 報 报 @@ -389,6 +491,7 @@ 塤 埙 塵 尘 塹 堑 +塿 𪣻 墊 垫 墜 坠 墮 堕 @@ -400,6 +503,7 @@ 壇 坛 壈 𡒄 壋 垱 +壎 埙 壓 压 壘 垒 壙 圹 @@ -409,6 +513,7 @@ 壟 垄 壠 垅 壢 坜 +壣 𪤚 壩 坝 壯 壮 壺 壶 @@ -434,8 +539,10 @@ 娘 娘 娛 娱 婁 娄 +婡 𫝫 婦 妇 婭 娅 +媈 𫝨 媧 娲 媯 妫 媰 㛀 @@ -444,28 +551,38 @@ 嫋 袅 嫗 妪 嫵 妩 +嫺 娴 嫻 娴 嫿 婳 嬀 妫 嬃 媭 +嬇 𫝬 嬈 娆 嬋 婵 嬌 娇 嬙 嫱 嬡 嫒 -嬤 嬤 +嬣 𪥰 +嬤 嬷 +嬦 𫝩 嬪 嫔 嬰 婴 嬸 婶 +嬻 𪥿 孃 娘 +孄 𫝮 +孆 𫝭 +孇 𪥫 孋 㛤 孌 娈 孫 孙 學 学 +孾 𪧀 孿 孪 宮 宫 家 家 寀 采 +寠 𪧘 寢 寝 實 实 寧 宁 @@ -500,7 +617,7 @@ 崍 崃 崑 昆 崗 岗 -崙 仑 +崙 仑 𪨧 崢 峥 崬 岽 嵐 岚 @@ -514,16 +631,20 @@ 嶠 峤 嶢 峣 嶧 峄 +嶨 峃 嶮 崄 嶴 岙 嶸 嵘 +嶹 𫝵 嶺 岭 嶼 屿 嶽 岳 +巊 𪩎 巋 岿 巒 峦 巔 巅 巖 岩 +巗 𪨷 巨 巨 巰 巯 巹 卺 @@ -540,6 +661,7 @@ 幘 帻 幟 帜 幣 币 +幩 𪩸 幫 帮 幬 帱 干 干 @@ -555,6 +677,7 @@ 廂 厢 廄 厩 廈 厦 +廎 庼 廕 荫 廚 厨 廝 厮 @@ -563,6 +686,7 @@ 廡 庑 廢 废 廣 广 +廧 𪪞 廩 廪 廬 庐 廳 厅 @@ -595,6 +719,7 @@ 復 复 徵 征 徵 徹 彻 +徿 𪫌 志 志 念 念 恆 恒 @@ -642,6 +767,7 @@ 憮 怃 憲 宪 憶 忆 +憸 𪫺 懀 𢙓 懇 恳 應 应 @@ -692,6 +818,7 @@ 掆 㧏 掗 挜 掙 挣 +掚 𪭵 掛 挂 採 采 揀 拣 @@ -705,6 +832,7 @@ 搵 揾 搶 抢 摋 𢫬 +摐 𪭢 摑 掴 摜 掼 摟 搂 @@ -714,6 +842,7 @@ 摺 折 摻 掺 撈 捞 +撊 𪭾 撏 挦 撐 撑 撓 挠 @@ -721,6 +850,7 @@ 撟 挢 撣 掸 撥 拨 +撧 𪮖 撫 抚 撲 扑 撳 揿 @@ -735,9 +865,10 @@ 擓 㧟 擔 担 據 据 +擟 𪭧 擠 挤 擡 抬 -擣 捣 +擣 捣 𢭏 擬 拟 擯 摈 擰 拧 @@ -752,6 +883,7 @@ 擾 扰 攄 摅 攆 撵 +攋 𪮶 攏 拢 攔 拦 攖 撄 @@ -765,6 +897,7 @@ 攪 搅 攬 揽 敎 教 +敓 敚 敗 败 敘 叙 敵 敌 @@ -795,6 +928,7 @@ 曆 历 曇 昙 曉 晓 +曊 𪰶 曏 向 曖 暧 曠 旷 @@ -828,6 +962,7 @@ 桿 杆 梁 梁 梔 栀 +梖 𪱷 梘 枧 條 条 梟 枭 @@ -861,6 +996,7 @@ 槤 梿 槧 椠 槨 椁 +槮 椮 槳 桨 槶 椢 槼 椝 @@ -895,15 +1031,19 @@ 檮 梼 檯 台 檳 槟 +檵 𪲛 檸 柠 檻 槛 +檾 𦼖 櫃 柜 +櫅 𪲎 櫓 橹 櫚 榈 櫛 栉 櫝 椟 櫞 橼 櫟 栎 +櫠 𪲮 櫥 橱 櫧 槠 櫨 栌 @@ -913,12 +1053,16 @@ 櫱 蘖 櫳 栊 櫸 榉 +櫺 棂 櫻 樱 欄 栏 欅 榉 +欇 𪳍 權 权 欍 𣐤 欏 椤 +欐 𪲔 +欑 𪴙 欒 栾 欓 𣗋 欖 榄 @@ -948,6 +1092,7 @@ 殼 壳 毀 毁 毆 殴 +毊 𪵑 毿 毵 氂 牦 氈 毡 @@ -973,6 +1118,7 @@ 涂 涂 涇 泾 涌 涌 +涗 涚 涼 凉 淀 淀 淒 凄 @@ -998,6 +1144,7 @@ 溈 沩 準 准 溝 沟 +溡 𪶄 溫 温 溮 浉 溳 涢 @@ -1031,6 +1178,7 @@ 潔 洁 潙 沩 潛 潜 +潣 𫞗 潤 润 潯 浔 潰 溃 @@ -1046,6 +1194,7 @@ 澤 泽 澦 滪 澩 泶 +澬 𫞚 澮 浍 澱 淀 澾 㳠 @@ -1066,6 +1215,7 @@ 濺 溅 濼 泺 濾 滤 +濿 𪵱 瀂 澛 瀃 𣽷 瀅 滢 @@ -1082,12 +1232,14 @@ 瀦 潴 瀧 泷 瀨 濑 -瀰 弥 +瀰 弥 㳽 瀲 潋 瀾 澜 灃 沣 灄 滠 +灍 𫞝 灑 洒 +灒 𪷽 灕 漓 灘 滩 灙 𣺼 @@ -1103,6 +1255,7 @@ 烏 乌 烴 烃 無 无 +煇 𪸩 煉 炼 煒 炜 煙 烟 @@ -1111,6 +1264,7 @@ 煩 烦 煬 炀 煱 㶽 +熂 𪸕 熅 煴 熉 𤈶 熌 𤇄 @@ -1137,10 +1291,13 @@ 燻 熏 燼 烬 燾 焘 +爃 𫞡 爄 𤇃 爍 烁 爐 炉 爛 烂 +爥 𪹳 +爧 𫞠 爭 争 爲 为 爺 爷 @@ -1151,11 +1308,14 @@ 牴 牴 抵 牽 牵 犖 荦 +犛 牦 +犞 𪺭 犢 犊 犧 牺 狀 状 狹 狭 狽 狈 +猌 𪺽 猙 狰 猶 犹 猻 狲 @@ -1163,6 +1323,7 @@ 獃 呆 獄 狱 獅 狮 +獊 𪺷 獎 奖 獨 独 獪 狯 @@ -1179,6 +1340,7 @@ 獼 猕 玀 猡 玁 𤞤 +珼 𫞥 現 现 琱 雕 琺 珐 @@ -1190,15 +1352,23 @@ 瑩 莹 瑪 玛 瑲 玱 +瑻 𪻲 瑽 𪻐 璉 琏 +璊 𫞩 +璝 𪻺 璡 琎 璣 玑 璦 瑷 璫 珰 璯 㻅 環 环 +璵 玙 +璸 瑸 +璼 𫞨 璽 玺 +璾 𫞦 +瓄 𪻨 瓊 琼 瓏 珑 瓔 璎 @@ -1222,6 +1392,7 @@ 症 症 痙 痉 痠 酸 +痮 𪽪 痾 疴 瘂 痖 瘋 疯 @@ -1231,6 +1402,7 @@ 瘡 疮 瘧 疟 瘮 瘆 +瘱 𪽷 瘲 疭 瘺 瘘 瘻 瘘 @@ -1267,6 +1439,7 @@ 監 监 盤 盘 盧 卢 +盨 𪾔 盪 荡 眞 真 眥 眦 @@ -1284,6 +1457,8 @@ 瞶 瞆 瞼 睑 矇 蒙 +矉 𪾸 +矑 𪾦 矓 眬 矚 瞩 矩 矩 @@ -1318,6 +1493,7 @@ 礪 砺 礫 砾 礬 矾 +礮 𪿫 礱 砻 祇 祇 只 祕 秘 @@ -1345,6 +1521,7 @@ 種 种 稱 称 穀 谷 +穇 䅟 穌 稣 積 积 穎 颖 @@ -1368,6 +1545,7 @@ 竈 灶 竊 窃 竪 竖 +竱 𫁟 競 竞 筆 笔 筍 笋 @@ -1387,10 +1565,12 @@ 篩 筛 篳 筚 簀 箦 +簂 𫂆 簍 篓 簑 蓑 簞 箪 簡 简 +簢 𫂃 簣 篑 簫 箫 簹 筜 @@ -1412,6 +1592,7 @@ 籮 箩 籲 吁 粵 粤 +糉 粽 糝 糁 糞 粪 糧 粮 @@ -1420,6 +1601,7 @@ 糴 籴 糶 粜 糹 纟 +糺 𫄙 系 系 糾 纠 紀 纪 @@ -1444,6 +1626,7 @@ 紛 纷 紜 纭 紝 纴 +紟 𫄛 紡 纺 紬 䌷 紮 扎 @@ -1458,20 +1641,25 @@ 紼 绋 紿 绐 絀 绌 +絁 𫄟 終 终 絃 弦 組 组 絅 䌹 絆 绊 +絍 𫟃 絎 绗 結 结 絕 绝 +絙 𫄠 絛 绦 絝 绔 絞 绞 絡 络 絢 绚 +絥 𫄢 給 给 +絧 𫄡 絨 绒 絰 绖 統 统 @@ -1487,14 +1675,18 @@ 綇 𦈋 綈 绨 綉 绣 +綋 𫟄 綌 绤 綏 绥 綐 䌼 綑 捆 經 经 +綖 𫄧 綜 综 綞 缍 +綟 𫄫 綠 绿 +綡 𫟅 綢 绸 綣 绻 綫 线 @@ -1526,16 +1718,19 @@ 緗 缃 緘 缄 緙 缂 -線 线 +線 线 缐 緝 缉 緞 缎 +緟 𫟆 締 缔 緡 缗 緣 缘 +緤 𫄬 緦 缌 編 编 緩 缓 緬 缅 +緮 𫄭 緯 纬 緰 𦈕 緱 缑 @@ -1546,10 +1741,12 @@ 緸 𦈑 緹 缇 緻 致 +緼 缊 縈 萦 縉 缙 縊 缢 縋 缒 +縍 𫄰 縎 𦈔 縐 绉 縑 缣 @@ -1565,6 +1762,7 @@ 縬 𦈚 縭 缡 縮 缩 +縰 𫄳 縱 纵 縲 缧 縳 䌸 @@ -1572,13 +1770,16 @@ 縵 缦 縶 絷 縷 缕 +縸 𫄲 縹 缥 縺 𦈐 總 总 績 绩 +繂 𫄴 繃 绷 繅 缫 繆 缪 +繈 𫄶 繏 𦈝 繐 穗 繒 缯 @@ -1590,14 +1791,18 @@ 繟 𦈎 繡 绣 繢 缋 +繨 𫄤 繩 绳 繪 绘 繫 系 +繬 𫄱 繭 茧 繮 缰 繯 缳 繰 缲 繳 缴 +繶 𫄷 +繷 𫄣 繸 䍁 繹 绎 繻 𦈡 @@ -1615,7 +1820,9 @@ 纓 缨 纔 才 纖 纤 +纗 𫄹 纘 缵 +纚 𫄥 纜 缆 缽 钵 罈 坛 @@ -1632,6 +1839,7 @@ 羥 羟 羨 羡 義 义 +羵 𫅗 羶 膻 習 习 翬 翚 @@ -1649,6 +1857,7 @@ 聶 聂 職 职 聹 聍 +聻 𫆏 聽 听 聾 聋 肅 肃 @@ -1676,9 +1885,11 @@ 膃 腽 膕 腘 膚 肤 +膞 䏝 膠 胶 膢 𦝼 膩 腻 +膹 𪱥 膽 胆 膾 脍 膿 脓 @@ -1729,15 +1940,19 @@ 葉 叶 葒 荭 著 著 +葝 𫈎 葤 荮 葦 苇 葯 药 葷 荤 +蒍 𫇭 蒐 搜 蒓 莼 蒔 莳 +蒕 蒀 蒙 蒙 蒞 莅 +蒭 𫇴 蒼 苍 蓀 荪 蓆 席 @@ -1754,6 +1969,8 @@ 蔥 葱 蔦 茑 蔭 荫 +蔯 𫈟 +蔿 𫇭 蕁 荨 蕆 蒇 蕎 荞 @@ -1761,12 +1978,16 @@ 蕓 芸 蕕 莸 蕘 荛 +蕝 𫈵 蕢 蒉 蕩 荡 蕪 芜 蕭 萧 +蕳 𫈉 蕷 蓣 +蕽 𫇽 薀 蕰 +薆 𫉁 薈 荟 薊 蓟 薌 芗 @@ -1787,10 +2008,13 @@ 藝 艺 藥 药 藪 薮 +藭 䓖 藴 蕴 藶 苈 +藷 𫉄 藹 蔼 藺 蔺 +蘀 萚 蘄 蕲 蘆 芦 蘇 苏 @@ -1817,6 +2041,7 @@ 蝕 蚀 蝟 猬 蝦 虾 +蝨 虱 蝸 蜗 螄 蛳 螞 蚂 @@ -1824,13 +2049,17 @@ 螮 䗖 螻 蝼 螿 螀 +蟂 𫋇 蟄 蛰 蟈 蝈 蟎 螨 +蟘 𫋌 +蟜 𫊸 蟣 虮 蟬 蝉 蟯 蛲 蟲 虫 +蟳 𫊻 蟶 蛏 蟻 蚁 蠁 蚃 @@ -1839,8 +2068,10 @@ 蠍 蝎 蠐 蛴 蠑 蝾 +蠔 蚝 蠟 蜡 蠣 蛎 +蠦 𫊮 蠨 蟏 蠱 蛊 蠶 蚕 @@ -1873,15 +2104,19 @@ 襇 裥 襉 裥 襏 袯 +襓 𫋹 襖 袄 +襗 𫋷 +襘 𫋻 襝 裣 襠 裆 襤 褴 襪 袜 -襬 摆 +襬 摆 䙓 襯 衬 襲 袭 襴 襕 +襵 𫌇 覆 覆 复 覈 核 見 见 @@ -1890,6 +2125,7 @@ 覓 觅 視 视 覘 觇 +覛 𫌪 覡 觋 覥 觍 覦 觎 @@ -1898,6 +2134,7 @@ 覯 觏 覲 觐 覷 觑 +覹 𫌭 覺 觉 覼 𫌨 覽 览 @@ -1922,11 +2159,14 @@ 託 托 讬 記 记 訛 讹 +訜 𫍛 訝 讶 +訞 𫍚 訟 讼 訢 䜣 訣 诀 訥 讷 +訨 𫟞 訩 讻 訪 访 設 设 @@ -1935,13 +2175,16 @@ 訶 诃 診 诊 註 注 +証 证 詀 𧮪 詁 诂 詆 诋 +詊 𫟟 詎 讵 詐 诈 -詑 𫍟 +詑 𫍡 詒 诒 +詓 𫍜 詔 诏 評 评 詖 诐 @@ -1964,12 +2207,15 @@ 該 该 詳 详 詵 诜 +詷 𫍣 詼 诙 詿 诖 +誂 𫍥 誄 诔 誅 诛 誆 诓 誇 夸 +誋 𫍪 誌 志 認 认 誑 诳 @@ -1986,11 +2232,16 @@ 誦 诵 誨 诲 說 说 +誫 𫍨 説 说 誰 谁 課 课 +誳 𫍮 +誴 𫟡 誶 谇 +誷 𫍬 誹 诽 +誺 𫍧 誼 谊 誾 訚 調 调 @@ -2011,15 +2262,19 @@ 諞 谝 諡 谥 諢 诨 +諣 𫍩 諤 谔 +諥 𫍳 諦 谛 諧 谐 -諫 谏 +諫 谏 𫍝 諭 谕 諮 咨 谘 +諯 𫍱 諰 𫍰 諱 讳 諳 谙 +諴 𫍯 諶 谌 諷 讽 諸 诸 @@ -2031,6 +2286,8 @@ 謂 谓 謄 誊 謅 诌 +謆 𫍸 +謉 𫍷 謊 谎 謎 谜 謏 𫍲 @@ -2048,21 +2305,29 @@ 謫 谪 謬 谬 謭 谫 +謯 𫍹 +謱 𫍴 謳 讴 +謸 𫍵 謹 谨 謾 谩 譁 哗 +譂 𫟠 譅 䜧 +譆 𫍻 證 证 譊 𫍢 譎 谲 譏 讥 +譑 𫍤 譖 谮 識 识 譙 谯 譚 谭 譜 谱 +譞 𫍽 譟 噪 +譨 𫍦 譫 谵 譭 毁 譯 译 @@ -2071,8 +2336,9 @@ 護 护 譸 诪 譽 誉 -譾 谫 +譾 谫 𫍿 讀 读 +讅 谉 變 变 讋 詟 讌 䜩 @@ -2090,8 +2356,10 @@ 豐 丰 豔 艳 豬 猪 +豵 𫎆 豶 豮 貓 猫 +貗 𫎌 貙 䝙 貝 贝 貞 贞 @@ -2111,7 +2379,7 @@ 貳 贰 貴 贵 貶 贬 -買 买 +買 买 𧹒 貸 贷 貺 贶 費 费 @@ -2134,6 +2402,7 @@ 賙 赒 賚 赉 賜 赐 +賝 𫎩 賞 赏 賟 𧹖 賠 赔 @@ -2160,15 +2429,18 @@ 贅 赘 贇 赟 贈 赠 +贉 𫎫 贊 赞 贋 赝 贍 赡 贏 赢 贐 赆 +贑 𫎬 贓 赃 贔 赑 贖 赎 贗 赝 +贚 𫎦 贛 赣 贜 赃 赬 赪 @@ -2182,10 +2454,12 @@ 踰 逾 踴 踊 蹌 跄 +蹔 𫏐 蹕 跸 蹟 迹 蹣 蹒 蹤 踪 +蹳 𫏆 蹺 跷 蹻 𫏋 躂 跶 @@ -2214,19 +2488,24 @@ 軑 轪 軒 轩 軔 轫 +軕 𫐅 軗 𨐅 軛 轭 +軜 𫐇 軟 软 軤 轷 軨 𫐉 軫 轸 +軬 𫐊 軲 轱 +軷 𫐈 軸 轴 軹 轵 軺 轺 軻 轲 軼 轶 軾 轼 +軿 𫐌 較 较 輄 𨐈 輅 辂 @@ -2234,26 +2513,32 @@ 輈 辀 載 载 輊 轾 +輋 𪨶 輒 辄 輓 挽 輔 辅 輕 轻 +輖 𫐏 輗 𫐐 輛 辆 輜 辎 輝 辉 輞 辋 輟 辍 +輢 𫐎 輥 辊 輦 辇 +輨 𫐑 輩 辈 輪 轮 輬 辌 輮 𫐓 輯 辑 輳 辏 +輷 𫐒 輸 输 輻 辐 +輼 辒 輾 辗 輿 舆 轀 辒 @@ -2261,11 +2546,16 @@ 轄 辖 轅 辕 轆 辘 +轇 𫐖 轉 转 +轊 𫐕 轍 辙 轎 轿 +轐 𫐗 轔 辚 +轗 𫐘 轟 轰 +轠 𫐙 轡 辔 轢 轹 轣 𫐆 @@ -2294,6 +2584,7 @@ 遠 远 遡 溯 適 适 +遱 𫐷 遲 迟 遷 迁 選 选 @@ -2313,10 +2604,12 @@ 鄒 邹 鄔 邬 鄖 郧 +鄟 𫑘 鄧 邓 鄭 郑 鄰 邻 鄲 郸 +鄳 𫑡 鄴 邺 鄶 郐 鄺 邝 @@ -2330,8 +2623,8 @@ 醣 糖 醫 医 醬 酱 -醯 醯 醱 酦 +醶 𫑷 釀 酿 釁 衅 釃 酾 @@ -2347,12 +2640,16 @@ 釗 钊 釘 钉 釙 钋 +釚 𫟲 針 针 +釟 𫓥 釣 钓 釤 钐 釦 扣 釧 钏 +釨 𫓦 釩 钒 +釲 𫟳 釳 𨰿 釵 钗 釷 钍 @@ -2363,6 +2660,8 @@ 鈁 钫 鈃 钘 鈄 钭 +鈅 钥 +鈆 𫓪 鈇 𫓧 鈈 钚 鈉 钠 @@ -2373,9 +2672,13 @@ 鈑 钣 鈒 钑 鈔 钞 -鈕 钮 扭 +鈕 钮 纽 +鈖 𫟴 +鈗 𫟵 +鈛 𫓨 鈞 钧 鈠 𨱁 +鈡 钟 鈣 钙 鈥 钬 鈦 钛 @@ -2396,30 +2699,42 @@ 鉀 钾 鉁 𨱅 鉅 巨 钜 +鉆 钻 鉈 铊 鉉 铉 鉋 铇 鉍 铋 鉑 铂 +鉔 𫓬 鉕 钷 鉗 钳 鉚 铆 鉛 铅 +鉝 𫟷 鉞 钺 +鉠 𫓭 鉢 钵 鉤 钩 鉦 钲 鉬 钼 鉭 钽 鉶 铏 +鉷 𫟹 鉸 铰 鉺 铒 鉻 铬 +鉽 𫟸 +鉾 𫓴 鉿 铪 銀 银 +銁 𫓲 +銂 𫟻 銃 铳 銅 铜 +銈 𫓯 +銊 𫓰 銍 铚 +銏 𫟶 銑 铣 銓 铨 銖 铢 @@ -2451,9 +2766,11 @@ 鋌 铤 鋏 铗 鋒 锋 +鋗 𫓶 鋙 铻 鋝 锊 鋟 锓 +鋠 𫓵 鋣 铘 鋤 锄 鋥 锃 @@ -2483,10 +2800,14 @@ 錙 锱 錚 铮 錛 锛 +錜 𫓻 +錝 𫓽 錟 锬 錠 锭 錡 锜 錢 钱 +錤 𫓹 +錥 𫓾 錦 锦 錨 锚 錩 锠 @@ -2497,6 +2818,7 @@ 錳 锰 錶 表 錸 铼 +錽 𫓸 鍀 锝 鍁 锨 鍃 锪 @@ -2504,9 +2826,11 @@ 鍆 钔 鍇 锴 鍈 锳 -鍊 炼 链 +鍉 𫔂 +鍊 炼 链 𫔀 鍋 锅 鍍 镀 +鍒 𫔄 鍔 锷 鍘 铡 鍚 钖 @@ -2526,14 +2850,18 @@ 鎂 镁 鎄 锿 鎇 镅 +鎈 𫟿 鎊 镑 鎌 镰 +鎍 𫔅 鎔 镕 鎖 锁 鎘 镉 +鎙 𫔈 鎚 锤 鎛 镈 鎝 𨱏 +鎞 𫔇 鎡 镃 鎢 钨 鎣 蓥 @@ -2542,7 +2870,7 @@ 鎩 铩 鎪 锼 鎬 镐 -鎭 鎮 +鎭 镇 鎮 镇 鎯 𨱍 鎰 镒 @@ -2571,6 +2899,7 @@ 鏡 镜 鏢 镖 鏤 镂 +鏥 𫔊 鏦 𫓩 鏨 錾 鏰 镚 @@ -2579,8 +2908,11 @@ 鏹 镪 鏺 䥽 鏽 锈 +鏾 𫔌 鐃 铙 鐄 𨱑 +鐇 𫔍 +鐈 𫓱 鐋 铴 鐍 𫔎 鐎 𨱓 @@ -2589,6 +2921,7 @@ 鐒 铹 鐓 镦 鐔 镡 +鐗 锏 鐘 钟 鐙 镫 鐝 镢 @@ -2597,6 +2930,7 @@ 鐦 锎 鐧 锏 鐨 镄 +鐪 𫓺 鐫 镌 鐮 镰 鐯 䦃 @@ -2606,8 +2940,11 @@ 鐶 镮 鐸 铎 鐺 铛 +鐼 𫔁 +鐽 𫟼 鐿 镱 鑄 铸 +鑉 𫠁 鑊 镬 鑌 镔 鑑 鉴 @@ -2622,13 +2959,15 @@ 鑰 钥 鑱 镵 鑲 镶 +鑴 𫔔 鑷 镊 鑹 镩 鑼 锣 鑽 钻 鑾 銮 鑿 凿 -钁 镢 +钁 镢 䦆 +钂 镋 镟 旋 長 长 門 门 @@ -2637,17 +2976,20 @@ 閆 闫 閈 闬 閉 闭 -開 开 +開 开 𫔭 閌 闶 閍 𨸂 閎 闳 閏 闰 閐 𨸃 閑 闲 -閒 闲 +閒 闲 𫔮 間 间 閔 闵 +閗 𫔯 閘 闸 +閝 𫠂 +閞 𫔰 閡 阂 閣 阁 閤 合 @@ -2659,6 +3001,7 @@ 閭 闾 閱 阅 閲 阅 +閵 𫔴 閶 阊 閹 阉 閻 阎 @@ -2675,6 +3018,7 @@ 闌 阑 闍 阇 闐 阗 +闑 𫔶 闒 阘 闓 闿 闔 阖 @@ -2720,22 +3064,27 @@ 雲 云 電 电 霢 霡 +霣 𫕥 霧 雾 +霼 𪵣 霽 霁 靂 雳 靄 霭 +靆 叇 靈 灵 靉 叆 靚 靓 靜 静 面 面 -靦 腼 +靦 腼 䩄 +靧 𫖃 靨 靥 鞀 鼗 鞏 巩 鞝 绱 鞦 秋 鞽 鞒 +鞾 𫖇 韁 缰 韃 鞑 韆 千 @@ -2745,9 +3094,12 @@ 韍 韨 韓 韩 韙 韪 +韚 𫠅 +韛 𫖔 韜 韬 -韝 鞲 +韝 鞲 𫖕 韞 韫 +韠 𫖒 韻 韵 響 响 頁 页 @@ -2759,6 +3111,7 @@ 須 须 頊 顼 頌 颂 +頍 𫠆 頎 颀 頏 颃 預 预 @@ -2771,17 +3124,20 @@ 頡 颉 頤 颐 頦 颏 +頫 𫖯 頭 头 頮 颒 頰 颊 頲 颋 頴 颕 +頵 𫖳 頷 颔 頸 颈 頹 颓 頻 频 頽 颓 顃 𩖖 +顅 𫖶 顆 颗 題 题 額 额 @@ -2790,11 +3146,13 @@ 顒 颙 顓 颛 顔 颜 -願 愿 +顗 𫖮 +願 愿 𫖸 顙 颡 顛 颠 類 类 顢 颟 +顣 𫖹 顥 颢 顧 顾 顫 颤 @@ -2822,11 +3180,13 @@ 飄 飘 飆 飙 飈 飚 +飋 𫗋 飛 飞 飠 饣 飢 饥 飣 饤 飥 饦 +飦 𫗞 飩 饨 飪 饪 飫 饫 @@ -2835,6 +3195,8 @@ 飱 飧 飲 饮 飴 饴 +飵 𫗢 +飶 𫗣 飼 饲 飽 饱 飾 饰 @@ -2861,11 +3223,15 @@ 餞 饯 餡 馅 餦 𫗠 +餧 𫗪 館 馆 +餪 𫗬 +餫 𫗥 +餬 糊 𫗫 餭 𫗮 -餱 糇 +餱 糇 𫗯 餳 饧 -餵 喂 +餵 喂 𫗭 餶 馉 餷 馇 餸 𩠌 @@ -2887,16 +3253,21 @@ 饘 𫗴 饜 餍 饞 馋 +饟 𫗵 +饠 𫗩 饢 馕 馬 马 馭 驭 馮 冯 +馯 𫘛 馱 驮 馳 驰 馴 驯 馹 驲 +馼 𫘜 駁 驳 駃 𫘝 +駊 𫘟 駎 𩧨 駐 驻 駑 驽 @@ -2908,11 +3279,14 @@ 駚 𩧫 駛 驶 駝 驼 +駞 𫘞 駟 驷 駡 骂 駢 骈 +駤 𫘠 駧 𩧲 駩 𩧴 +駫 𫘡 駭 骇 駰 骃 駱 骆 @@ -2923,7 +3297,10 @@ 騁 骋 騂 骍 騃 𫘤 +騄 𫘧 騅 骓 +騉 𫘥 +騊 𫘦 騌 骔 騍 骒 騎 骑 @@ -2932,6 +3309,7 @@ 騖 骛 騙 骗 騚 𩨊 +騜 𫘩 騝 𩨃 騟 𩨈 騠 𫘨 @@ -2942,23 +3320,30 @@ 騭 骘 騮 骝 騰 腾 +騱 𫘬 +騴 𫘫 +騵 𫘪 騶 驺 騷 骚 騸 骟 +騻 𫘭 +騼 𫠋 騾 骡 驀 蓦 驁 骜 驂 骖 驃 骠 -驄 骢 +驄 骢 𩨂 驅 驱 驊 骅 驋 𩧯 驌 骕 驍 骁 驏 骣 +驓 𫘯 驕 骄 驗 验 +驙 𫘰 驚 惊 驛 驿 驟 骤 @@ -2966,6 +3351,7 @@ 驤 骧 驥 骥 驦 骦 +驨 𫘱 驪 骊 驫 骉 骯 肮 @@ -2978,6 +3364,7 @@ 鬆 松 鬍 胡 鬚 须 +鬠 𫘽 鬢 鬓 鬥 斗 鬧 闹 @@ -2993,14 +3380,19 @@ 魟 𫚉 魢 鱾 魥 𩽹 +魦 𫚌 魨 鲀 魯 鲁 魴 鲂 +魵 𫚍 魷 鱿 魺 鲄 +魽 𫠐 鮁 鲅 鮃 鲆 鮄 𫚒 +鮅 𫚑 +鮆 𫚖 鮊 鲌 鮋 鲉 鮍 鲏 @@ -3016,16 +3408,20 @@ 鮞 鲕 鮟 𩽾 鮣 䲟 +鮤 𫚓 鮦 鲖 鮪 鲔 鮫 鲛 鮭 鲑 鮮 鲜 +鮯 𫚗 鮰 𫚔 鮳 鲓 +鮵 𫚛 鮶 鲪 鮸 𩾃 鮺 鲝 +鮿 𫚚 鯀 鲧 鯁 鲠 鯄 𩾁 @@ -3040,6 +3436,7 @@ 鯗 鲞 鯛 鲷 鯝 鲴 +鯞 𫚡 鯡 鲱 鯢 鲵 鯤 鲲 @@ -3047,12 +3444,14 @@ 鯨 鲸 鯪 鲮 鯫 鲰 +鯬 𫚞 鯰 鲶 鯱 𩾇 鯴 鲺 鯶 𩽼 鯷 鳀 鯽 鲫 +鯾 𫚣 鯿 鳊 鰁 鳈 鰂 鲗 @@ -3060,21 +3459,27 @@ 鰆 䲠 鰈 鲽 鰉 鳇 +鰋 𫚢 鰌 䲡 鰍 鳅 鰏 鲾 鰐 鳄 +鰑 𫚊 鰒 鳆 鰓 鳃 +鰕 𫚥 +鰛 鳁 鰜 鳒 鰟 鳑 鰠 鳋 鰣 鲥 鰤 𫚕 鰥 鳏 +鰦 𫚤 鰧 䲢 鰨 鳎 鰩 鳐 +鰫 𫚦 鰭 鳍 鰮 鳁 鰱 鲢 @@ -3086,12 +3491,16 @@ 鰺 鲹 鰻 鳗 鰼 鳛 +鰽 𫚧 鰾 鳔 鱂 鳉 +鱄 𫚋 鱅 鳙 +鱆 𫠒 鱇 𩾌 鱈 鳕 鱉 鳖 +鱊 𫚪 鱒 鳟 鱔 鳝 鱖 鳜 @@ -3100,6 +3509,7 @@ 鱝 鲼 鱟 鲎 鱠 鲙 +鱢 𫚫 鱣 鳣 鱤 鳡 鱧 鳢 @@ -3107,6 +3517,7 @@ 鱭 鲚 鱮 𫚈 鱯 鳠 +鱲 𫚭 鱷 鳄 鱸 鲈 鱺 鲡 @@ -3120,12 +3531,17 @@ 鳶 鸢 鳷 𫛛 鳼 𪉃 +鳽 𫛚 鳾 䴓 +鴀 𫛜 鴃 𫛞 +鴅 𫛝 鴆 鸩 鴇 鸨 鴉 鸦 +鴐 𫛤 鴒 鸰 +鴔 𫛡 鴕 鸵 鴗 𫁡 鴛 鸳 @@ -3134,18 +3550,23 @@ 鴞 鸮 鴟 鸱 鴣 鸪 +鴥 𫛣 鴦 鸯 鴨 鸭 +鴮 𫛦 鴯 鸸 鴰 鸹 鴲 𪉆 +鴳 𫛩 鴴 鸻 鴷 䴕 鴻 鸿 +鴽 𫛪 鴿 鸽 鵁 䴔 鵂 鸺 鵃 鸼 +鵊 𫛥 鵐 鹀 鵑 鹃 鵒 鹆 @@ -3153,13 +3574,17 @@ 鵚 𪉍 鵜 鹈 鵝 鹅 +鵟 𫛭 鵠 鹄 鵡 鹉 +鵧 𫛨 +鵩 𫛳 鵪 鹌 +鵫 𫛱 鵬 鹏 鵮 鹐 鵯 鹎 -鵰 雕 +鵰 雕 𫛲 鵲 鹊 鵷 鹓 鵾 鹍 @@ -3167,6 +3592,7 @@ 鶇 鸫 鶉 鹑 鶊 鹒 +鶌 𫛵 鶒 𫛶 鶓 鹋 鶖 鹙 @@ -3175,10 +3601,13 @@ 鶚 鹗 鶡 鹖 鶥 鹛 +鶦 𫛷 鶩 鹜 鶪 䴗 鶬 鸧 +鶭 𫛯 鶯 莺 +鶰 𫛫 鶲 鹟 鶴 鹤 鶹 鹠 @@ -3190,21 +3619,29 @@ 鷁 鹢 鷂 鹞 鷄 鸡 +鷅 𫛽 鷈 䴘 +鷉 䴘 鷊 鹝 +鷐 𫜀 鷓 鹧 鷔 𪉑 鷖 鹥 鷗 鸥 鷙 鸷 鷚 鹨 +鷣 𫜃 +鷤 𫛴 鷥 鸶 鷦 鹪 鷨 𪉊 +鷩 𫜁 鷫 鹔 鷯 鹩 鷲 鹫 鷳 鹇 +鷴 鹇 +鷷 𫜄 鷸 鹬 鷹 鹰 鷺 鹭 @@ -3212,10 +3649,12 @@ 鷿 䴙 鸂 㶉 鸇 鹯 +鸊 䴙 鸋 𫛢 鸌 鹱 鸏 鹲 鸕 鸬 +鸗 𫛟 鸘 鹴 鸚 鹦 鸛 鹳 @@ -3230,13 +3669,15 @@ 麥 麦 麨 𪎊 麩 麸 -麪 面 +麪 面 麺 麫 面 +麬 𤿲 麯 曲 麲 𪎉 麳 𪎌 -麴 曲 -麵 面 +麴 曲 麹 +麵 面 麺 +麷 𫜑 麼 么 麽 麽 么 麽 黃 黄 @@ -3268,106 +3709,211 @@ 齡 龄 齣 出 齦 龈 -齧 啮 +齧 啮 𫜩 +齩 𫜪 齪 龊 齬 龉 +齭 𫜭 +齯 𫠜 +齰 𫜬 齲 龋 +齴 𫜮 齶 腭 齷 龌 +齾 𫜰 龍 龙 龎 厐 龐 庞 龑 䶮 +龓 𫜲 龔 龚 龕 龛 龜 龟 龭 𩨎 龯 𨱆 +鿁 䜤 𠌥 𠆿 𠏢 𠉗 +𠐊 𫝋 𠞆 𠛆 𠠎 𠚳 +𠬙 𪠡 +𡂡 𪢒 +𡃄 𪡺 +𡃤 𪢐 𡄔 𠴢 𡄣 𠵸 𡅏 𠲥 𡑭 𡋗 +𡓁 𪤄 𡓾 𡋀 𡞵 㛟 +𡟫 𫝪 𡠹 㛿 +𡡎 𡞱 𡢃 㛠 𡮉 𡭜 𡮣 𡭬 +𡸗 𪨩 +𡹬 𪨹 𡻕 岁 𡾱 㟜 +𢍰 𪪴 +𢣐 𪬚 𢣚 𢘝 𢣭 𢘞 +𢤩 𪫡 +𢤿 𪬯 +𢯷 𪭝 +𢶒 𪭯 𢶫 𢫞 +𢷬 𢭏 𢷮 𢫊 𢹿 𢬦 +𢺳 𪮳 +𣍐 𠊉 𣙎 㭣 𣝕 𣘷 𣞻 𣘓 𣠲 𣑶 𣯴 𣭤 +𣽏 𪶮 𣾷 㳢 𣿉 𣶫 𤁣 𣺽 +𤄷 𪶒 +𤑹 𪹀 𤒎 𤊀 +𤒻 𪹹 +𤓌 𪹠 +𤘀 𪺣 +𤛱 𫞢 +𤜆 𪺪 +𤠮 𪺸 +𤩂 𫞧 𤪺 㻘 𤫩 㻏 +𤬅 𪼴 +𤳷 𪽝 𤳸 𤳄 +𤷃 𪽭 𤸫 𤶧 +𤺔 𪽴 𥌃 𥅘 +𥏝 𪿊 𥕥 𥐰 𥖅 𥐯 +𥖲 𪿞 +𥗇 𪿵 +𥜐 𫀓 +𥜰 𫀌 𥢢 䅪 +𥢶 𫞷 +𥢷 𫀮 𥨐 𥧂 +𥯤 𫁳 +𥴨 𫂖 +𥴼 𫁺 𥵃 𥱔 𥵊 𥭉 𥸠 𥮋 +𥻦 𫂿 𥼽 𥹥 𥽖 𥺇 +𥾯 𫄝 𥿊 𦈈 +𦀖 𫄦 𦂅 𦈒 𦃄 𦈗 +𦃩 𫄯 +𦅇 𫄪 +𦅈 𫄵 +𦆲 𫟇 +𦒀 𫅥 +𦔖 𫅼 +𦟼 𫆝 +𦠅 𫞅 +𦡝 𫆫 𦢈 𣍨 𦣎 𦟗 +𦧺 𫇘 𦪙 䑽 𦪽 𦨩 +𧒯 𫊹 𧔥 𧒭 𧜗 䘞 𧜵 䙊 𧝞 䘛 +𧞫 𫌋 +𧟀 𧝧 +𧡴 𫌫 +𧢄 𫌬 +𧦝 𫍞 𧦧 𫍟 +𧩕 𫍭 𧩙 䜥 +𧩼 𫍶 +𧫝 𫍺 +𧬤 𫍼 +𧭈 𫍾 𧳟 𧳕 𧵳 䞌 𧶔 𧹓 𧶧 䞎 +𧷎 𪠀 +𧸘 𫎨 +𧹈 𪥠 +𧽯 𫎸 +𨂐 𫏌 𨄣 𨀱 𨅍 𨁴 +𨆪 𫏕 𨇁 𧿈 𨇞 𨅫 +𨇤 𫏨 +𨇰 𫏞 +𨇽 𫏑 𨈊 𨂺 𨈌 𨄄 𨊰 䢀 𨊸 䢁 𨊻 𨐆 𨋢 䢂 +𨌈 𫐍 +𨍰 𫐔 +𨎌 𫐋 𨎮 𨐉 𨏠 𨐇 𨏥 𨐊 +𨞺 𫟫 +𨟊 𫟬 𨤻 𨤰 𨥛 𨱀 +𨥟 𫓫 𨦫 䦀 𨧜 䦁 +𨧰 𫟽 𨧱 𨱊 +𨨛 𫓼 +𨨢 𫓿 +𨩰 𫟾 +𨪕 𫓮 𨫒 𨱐 +𨬖 𫔏 +𨭖 𫔑 +𨭸 𫔐 𨮂 𨱕 +𨮳 𫔒 𨯅 䥿 +𨯟 𫔓 +𨰃 𫔉 +𨰋 𫓳 +𨰥 𫔕 +𨰲 𫔃 𨳑 𨸁 𨳕 𨸀 𨴗 𨸅 +𨴹 𫔲 𨵩 𨸆 𨵸 𨸇 𨶀 𨸉 @@ -3375,12 +3921,26 @@ 𨶮 𨸌 𨶲 𨸋 𨷲 𨸎 +𨼳 𫔽 𨽏 𨸘 +𩀨 𫕚 +𩅙 𫕨 +𩎖 𫖑 𩎢 𩏾 +𩏂 𫖓 +𩏠 𫖖 𩏪 𩏽 +𩏷 𫃗 +𩒎 𫖭 𩓣 𩖕 +𩓥 𫖵 +𩔑 𫖷 +𩔳 𫖴 +𩖰 𫠇 𩗀 𩙦 +𩗓 𫗈 𩗡 𩙧 +𩗴 𫗉 𩘀 𩙩 𩘝 𩙭 𩘹 𩙨 @@ -3388,17 +3948,22 @@ 𩙈 𩙰 𩚛 𩟿 𩚥 𩠀 +𩚩 𫗡 𩚵 𩠁 𩛆 𩠂 +𩛌 𫗤 +𩛡 𫗨 𩛩 𩠃 𩜇 𩠉 𩜦 𩠆 𩜵 𩠊 𩝔 𩠋 +𩝽 𫗳 𩞄 𩠎 𩞦 𩠏 𩞯 䭪 𩟐 𩠅 +𩟗 𫗚 𩠴 𩠠 𩡺 𩧦 𩢡 𩧬 @@ -3407,6 +3972,7 @@ 𩢾 𩧮 𩣏 𩧶 𩣑 䯃 +𩣫 𩧸 𩣵 𩧻 𩣺 𩧼 𩤊 𩧩 @@ -3417,21 +3983,34 @@ 𩥇 𩨍 𩥉 𩧱 𩥑 𩨌 +𩦠 𫠌 𩧆 𩨐 𩭙 𩬣 +𩯁 𫙂 𩯳 𩯒 𩰀 𩬤 𩳤 𩲒 +𩵦 𫠏 𩵩 𩽺 𩵹 𩽻 +𩶁 𫚎 𩶘 䲞 𩶰 𩽿 𩶱 𩽽 𩷰 𩾄 𩸃 𩾅 +𩸄 𫚝 +𩸡 𫚟 𩸦 𩾆 +𩻗 𫚨 +𩻬 𫚩 +𩻮 𫚘 +𩼶 𫚬 𩽇 𩾎 +𩿅 𫠖 +𩿤 𫛠 𩿪 𪉄 +𪀖 𫛧 𪀦 𪉅 𪀾 𪉋 𪁈 𪉉 @@ -3439,10 +4018,22 @@ 𪂆 𪉎 𪃍 𪉐 𪃏 𪉏 +𪃒 𫛻 +𪃧 𫛹 𪄆 𪉔 𪄕 𪉒 +𪅂 𫜂 +𪆷 𫛾 𪇳 𪉕 +𪈼 𪉓 +𪉸 𫜊 𪋿 𪎍 +𪌭 𫜓 +𪍠 𫜕 +𪓰 𫜟 𪔵 𪔭 𪘀 𪚏 𪘯 𪚐 +𪙏 𫜯 +𫒡 𫓷 +𫜦 𫜫 diff --git a/data/trad_to_simp/phrases.txt b/data/dictionary/TSPhrases.txt similarity index 82% rename from data/trad_to_simp/phrases.txt rename to data/dictionary/TSPhrases.txt index 7c6920c..03da389 100644 --- a/data/trad_to_simp/phrases.txt +++ b/data/dictionary/TSPhrases.txt @@ -1,6 +1,8 @@ 一目瞭然 一目了然 上鍊 上链 不瞭解 不了解 +么麼 幺麽 +么麽 幺麽 乾乾淨淨 干干净净 乾乾脆脆 干干脆脆 乾元 乾元 @@ -22,6 +24,7 @@ 乾縣 乾县 乾象 乾象 乾造 乾造 +乾道 乾道 乾陵 乾陵 乾隆 乾隆 乾隆年間 乾隆年间 @@ -42,17 +45,25 @@ 反覆思量 反复思量 反覆性 反复性 名覆金甌 名复金瓯 +哪吒 哪吒 回覆 回复 -壺裏乾坤 壸里乾坤 +壺裏乾坤 壶里乾坤 +大目乾連冥間救母變文 大目乾连冥间救母变文 宫商角徵羽 宫商角徵羽 射覆 射复 尼乾陀 尼乾陀 +幺麼 幺麽 +幺麼小丑 幺麽小丑 +幺麼小醜 幺麽小丑 康乾 康乾 張法乾 张法乾 彷彿 仿佛 彷徨 彷徨 徵弦 徵弦 徵絃 徵弦 +徵聲 徵声 +徵調 徵调 +徵音 徵音 情有獨鍾 情有独钟 情有独锺 憑藉 凭借 憑藉着 凭借着 @@ -64,21 +75,48 @@ 拜覆 拜复 據瞭解 据了解 文錦覆阱 文锦复阱 +於世成 於世成 +於乎 於乎 +於仲完 於仲完 +於倫 於伦 +於其一 於其一 +於則 於则 +於勇明 於勇明 +於呼哀哉 於呼哀哉 +於單 於单 +於坦 於坦 +於崇文 於崇文 +於忠祥 於忠祥 +於惟一 於惟一 +於戲 於戏 +於敖 於敖 +於梨華 於梨华 +於清言 於清言 +於琳 於琳 +於穆 於穆 +於竹屋 於竹屋 +於菟 於菟 +於邑 於邑 +於陵子 於陵子 +旋乾轉坤 旋乾转坤 旋轉乾坤 旋转乾坤 旋轉乾坤之力 旋转乾坤之力 明瞭 明了 明覆 明复 書中自有千鍾粟 书中自有千锺粟 朝乾夕惕 朝乾夕惕 +木吒 木吒 李乾德 李乾德 李澤鉅 李泽钜 李鍊福 李链福 李鍾郁 李锺郁 +樊於期 樊於期 沈沒 沉没 沈沒成本 沉没成本 沈積 沉积 沈船 沉船 沈默 沉默 +流徵 流徵 浪蕩乾坤 浪荡乾坤 滑藉 滑借 牴牾 抵牾 @@ -87,6 +125,7 @@ 珍珠項鍊 珍珠项链 甚鉅 甚钜 申覆 申复 +畢昇 毕昇 發覆 发复 盼既示覆 盼既示复 瞭如 了如 @@ -101,6 +140,7 @@ 神祇 神祇 稟覆 禀复 答覆 答复 +篤麼 笃麽 簡單明瞭 简单明了 籌畫 筹划 素藉 素借 @@ -181,6 +221,9 @@ 角徵 角徵 角徵羽 角徵羽 計畫 计划 +變徵 变徵 +變徵之聲 变徵之声 +變徵之音 变徵之音 貂覆額 貂复额 買臣覆水 买臣复水 踅門瞭戶 踅门了户 @@ -190,6 +233,7 @@ 酒逢知己千鍾少話不投機半句多 酒逢知己千锺少话不投机半句多 醞藉 酝借 重覆 重复 +金吒 金吒 金鍊 金链 鈕釦 纽扣 鈞覆 钧复 @@ -198,6 +242,7 @@ 鉅防 钜防 鉸鍊 铰链 銀鍊 银链 +錢鍾書 钱锺书 鍊墜 链坠 鍊子 链子 鍊形 链形 @@ -220,8 +265,9 @@ 項鍊 项链 頗覆 颇复 頸鍊 颈链 +顛乾倒坤 颠乾倒坤 顛倒乾坤 颠倒乾坤 顧藉 顾借 -麼麼 麽麽 +麼些族 麽些族 黄鍾公 黄锺公 龍鍾 龙钟 龙锺 diff --git a/data/tw/tw-it.txt b/data/dictionary/TWPhrasesIT.txt similarity index 92% rename from data/tw/tw-it.txt rename to data/dictionary/TWPhrasesIT.txt index dc4858b..6d0bacb 100644 --- a/data/tw/tw-it.txt +++ b/data/dictionary/TWPhrasesIT.txt @@ -41,6 +41,8 @@ SQL注入攻擊 SQL隱碼攻擊 光標 游標 光盤 光碟 光驅 光碟機 +免提 擴音 +內存 記憶體 內核 核心 內置 內建 內聯函數 行內函數 @@ -63,6 +65,7 @@ SQL注入攻擊 SQL隱碼攻擊 參數表 參數列 句柄 控制代碼 可視化 視覺化 +呼出 撥出 呼叫轉移 來電轉駁 命令式編程 指令式程式設計 命令行 命令列 @@ -71,11 +74,13 @@ SQL注入攻擊 SQL隱碼攻擊 單片機 微控制器 回調 回撥 圖像 影象 +圖庫 相簿 圖標 圖示 在線 線上 地址 地址 位址 地址欄 位址列 城域王 都會網路 +場效應管 場效電晶體 壁紙 桌布 壁紙 外置 外接 外鍵 外來鍵 @@ -88,6 +93,7 @@ SQL注入攻擊 SQL隱碼攻擊 字符集 字符集 字節 位元組 字體 字型 +存儲 儲存 存盤 存檔 宏內核 單核心 密鑰 金鑰 @@ -114,15 +120,19 @@ SQL注入攻擊 SQL隱碼攻擊 性價比 價效比 性能 效能 截取 擷取 +截屏 截圖 打印 列印 -打印机 印表機 +打印機 印表機 打開 開啟 打開 持久性 永續性 捲積 摺積 掃描儀 掃描器 +掛斷 結束通話 採樣 取樣 +採樣率 取樣率 接口 介面 控件 控制項 +搜索 搜尋 操作系統 作業系統 擴展名 副檔名 支持 支援 @@ -151,6 +161,7 @@ SQL注入攻擊 SQL隱碼攻擊 時分複用 分時多工 時鐘頻率 時脈頻率 晶閘管 閘流體 +晶體管 電晶體 智能 智慧 最終用戶 終端使用者 有損壓縮 有失真壓縮 @@ -158,6 +169,7 @@ SQL注入攻擊 SQL隱碼攻擊 本地代碼 原生代碼 析構函數 解構函式 枚舉 列舉 +查找 查詢 查看 檢視 桌面型 桌上型 構造函數 建構函式 @@ -173,6 +185,7 @@ SQL注入攻擊 SQL隱碼攻擊 比特率 位元率 波分複用 波長分波多工 消息 訊息 消息 +添加 新增 源代碼 原始碼 源文件 原始檔 溢出 溢位 @@ -180,7 +193,7 @@ SQL注入攻擊 SQL隱碼攻擊 激光 鐳射 激活 啟用 無損壓縮 無失真壓縮 -物理内存 實體記憶體 +物理內存 實體記憶體 物理地址 實體地址 狀態欄 狀態列 用戶 使用者 @@ -188,6 +201,7 @@ SQL注入攻擊 SQL隱碼攻擊 異步 非同步 登錄 登入 發佈 釋出 +發送 傳送 皮膚 面板 盤片 碟片 盤符 碟符 @@ -204,6 +218,8 @@ SQL注入攻擊 SQL隱碼攻擊 磁道 磁軌 社區 社羣 社區 移動硬盤 行動硬碟 +移動網絡 行動網路 +移動資料 行動資料 移動通信 行動通訊 移動電話 行動電話 程序 程式 @@ -232,9 +248,12 @@ SQL注入攻擊 SQL隱碼攻擊 縮進 縮排 總線 匯流排 缺省 預設 +聯繫 聯絡 +聯繫歷史 通話記錄 聲卡 音效卡 脫機 離線 腳本 指令碼 +自動轉屏 自動旋轉螢幕 臺式機 桌上型電腦 航天飛機 太空梭 芯片 晶片 @@ -250,6 +269,7 @@ SQL注入攻擊 SQL隱碼攻擊 視圖 檢視 視頻 視訊 解釋器 直譯器 +觸摸 觸控 觸摸屏 觸控式螢幕 計算機安全 電腦保安 計算機科學 電腦科學 @@ -257,6 +277,7 @@ SQL注入攻擊 SQL隱碼攻擊 設備 裝置 設置 設定 註冊機 序號產生器 +註冊表 登錄檔 註銷 登出 調制 調變 調度 排程 @@ -269,6 +290,7 @@ SQL注入攻擊 SQL隱碼攻擊 軟件 軟體 軟驅 軟碟機 通信 通訊 +通訊卡 通話卡 連接 連線 進制 進位制 進程 程序 進程 @@ -277,7 +299,6 @@ SQL注入攻擊 SQL隱碼攻擊 遠程 遠端 適配器 介面卡 邏輯門 邏輯閘 -采样率 取樣率 重命名 重新命名 重裝 重灌 重載 過載 @@ -293,6 +314,7 @@ SQL注入攻擊 SQL隱碼攻擊 隊列 佇列 集成 整合 集成電路 積體電路 +集羣 叢集 雲存儲 雲端儲存 雲計算 雲端計算 面向對象 物件導向 @@ -307,15 +329,16 @@ SQL注入攻擊 SQL隱碼攻擊 顯像管 映象管 顯卡 顯示卡 顯存 視訊記憶體 +飛行模式 飛航模式 首席信息官 資訊長 首席執行官 執行長 首席技術官 技術長 首席運營官 營運長 +高性能計算 高效能運算 高端 高階 進階 高級 高階 進階 高級 高速緩存 快取記憶體 +默認 預設 默認值 預設值 點擊 點選 鼠標 滑鼠 -晶體管 電晶體 -場效應管 場效電晶體 \ No newline at end of file diff --git a/data/tw/tw-name.txt b/data/dictionary/TWPhrasesName.txt similarity index 94% rename from data/tw/tw-name.txt rename to data/dictionary/TWPhrasesName.txt index 86a97cb..20c7ec5 100644 --- a/data/tw/tw-name.txt +++ b/data/dictionary/TWPhrasesName.txt @@ -15,7 +15,7 @@ 厄瓜多爾 厄瓜多 厄立特里亞 厄利垂亞 吉布堤 吉布地 -哈萨克斯坦 哈薩克 +哈薩克斯坦 哈薩克 哥斯達黎加 哥斯大黎加 圖瓦盧 吐瓦魯 土庫曼斯坦 土庫曼 @@ -33,14 +33,14 @@ 尼日爾 尼日 岡比亞 甘比亞 巴巴多斯 巴貝多 -巴布亞新幾内亚 巴布亞紐幾內亞 +巴布亞新幾內亞 巴布亞紐幾內亞 布基納法索 布吉納法索 布隆迪 蒲隆地 -帕劳 帛琉 +帕勞 帛琉 幾內亞比紹 幾內亞比索 意大利 義大利 所羅門羣島 索羅門羣島 -文莱 汶萊 +文萊 汶萊 斯威士蘭 史瓦濟蘭 斯洛文尼亞 斯洛維尼亞 新西蘭 紐西蘭 @@ -71,7 +71,7 @@ 肯尼亞 肯亞 莫桑比克 莫三比克 萊索托 賴索托 -蘇裏南 蘇利南 +蘇里南 蘇利南 貝寧 貝南 贊比亞 尚比亞 阿塞拜疆 亞塞拜然 diff --git a/data/dictionary/TWPhrasesOther.txt b/data/dictionary/TWPhrasesOther.txt new file mode 100644 index 0000000..2e44201 --- /dev/null +++ b/data/dictionary/TWPhrasesOther.txt @@ -0,0 +1,14 @@ +元音 母音 +出租車 計程車 +咖喱 咖哩 +奔馳 賓士 +奶酪 乳酪 +方便麵 速食麵 +涼菜 冷盤 +硅 矽 +詞組 片語 +蹦極 笨豬跳 +輔音 子音 +酰 醯 +鈈 鈽 +鐦 鉲 diff --git a/data/tw/to_tw_variants.txt b/data/dictionary/TWVariants.txt similarity index 66% rename from data/tw/to_tw_variants.txt rename to data/dictionary/TWVariants.txt index 73cea60..253bb9b 100644 --- a/data/tw/to_tw_variants.txt +++ b/data/dictionary/TWVariants.txt @@ -1,23 +1,30 @@ 僞 偽 兇 凶 啓 啟 -囱 囪 +嫺 嫻 嬀 媯 +峯 峰 +幺 么 棱 稜 污 汙 泄 洩 涌 湧 潙 溈 +潨 潀 爲 為 牀 床 +痹 痺 +癡 痴 着 著 +睾 睪 竈 灶 +糉 粽 +纔 才 羣 群 +蔿 蒍 衆 眾 裏 裡 +覈 核 踊 踴 -鷄 雞 +鮎 鯰 麪 麵 -麯 麴 -覈 核 -峯 峰 diff --git a/data/tw/from_tw_phrases.txt b/data/dictionary/TWVariantsRevPhrases.txt similarity index 98% rename from data/tw/from_tw_phrases.txt rename to data/dictionary/TWVariantsRevPhrases.txt index 77ef665..039fe9e 100644 --- a/data/tw/from_tw_phrases.txt +++ b/data/dictionary/TWVariantsRevPhrases.txt @@ -1,51 +1,51 @@ -合著 合著 -鉅著 鉅著 -巨著 巨著 -昭著 昭著 -顯著 顯著 -著錄 著錄 -著志 著志 -著稱 著稱 -著述 著述 -著書 著書 -著名 著名 -名著 名著 -著作 著作 -著者 著者 -著於 著於 -著白 著白 -著式 著式 -凶吉 凶吉 -吉凶 吉凶 -凶年 凶年 凶事 凶事 凶信 凶信 凶兆 凶兆 +凶吉 凶吉 凶地 凶地 凶多吉少 凶多吉少 凶宅 凶宅 -凶歲 凶歲 +凶年 凶年 凶德 凶德 凶怪 凶怪 凶日 凶日 凶服 凶服 +凶歲 凶歲 凶死 凶死 凶氣 凶氣 凶煞 凶煞 凶燄 凶燄 -凶禮 凶禮 凶神 凶神 -凶豎 凶豎 +凶禮 凶禮 凶耗 凶耗 凶肆 凶肆 凶荒 凶荒 凶訊 凶訊 +凶豎 凶豎 凶身 凶身 凶逆 凶逆 凶門 凶門 +合著 合著 +吉凶 吉凶 +名著 名著 四凶 四凶 大凶 大凶 +巨著 巨著 +昭著 昭著 歲凶 歲凶 -閔凶 閔凶 +著作 著作 +著名 著名 +著式 著式 +著志 著志 +著於 著於 +著書 著書 +著白 著白 +著稱 著稱 +著者 著者 +著述 著述 +著錄 著錄 逢凶 逢凶 -避凶 避凶 \ No newline at end of file +避凶 避凶 +鉅著 鉅著 +閔凶 閔凶 +顯著 顯著 diff --git a/data/scripts/common.py b/data/scripts/common.py index 4d69d60..f94bca9 100644 --- a/data/scripts/common.py +++ b/data/scripts/common.py @@ -1,31 +1,34 @@ #coding: utf-8 +import codecs +import sys def sort_items(input_filename, output_filename): - input_file = open(input_filename, "r") + input_file = codecs.open(input_filename, "r", encoding="utf-8") dic = {} for line in input_file: - if len(line) == 0: + if len(line) == 0 or line == '\n': continue try: key, value = line.split("\t") except ValueError: - print line + print(line) while value[-1] == "\n" or value[-1] == "\r": value = value[:-1] dic[key] = value input_file.close() - output_file = open(output_filename, "w") + output_file = open(output_filename, "wb") - for key in sorted(dic.iterkeys()): - output_file.write(key + "\t" + dic[key] + "\n") + for key in sorted(dic.keys()): + line = key + "\t" + dic[key] + "\n" + output_file.write(line.encode('utf-8')) output_file.close() def reverse_items(input_filename, output_filename): - input_file = open(input_filename, "r") + input_file = codecs.open(input_filename, "r", encoding="utf-8") dic = {} for line in input_file: @@ -37,22 +40,23 @@ def reverse_items(input_filename, output_filename): value_list = value.split(" ") for value in value_list: - if dic.has_key(value): + if value in dic: dic[value].append(key) else: dic[value] = [key] input_file.close() - output_file = open(output_filename, "w") + output_file = open(output_filename, "wb") - for key in sorted(dic.iterkeys()): - output_file.write(key + "\t" + " ".join(dic[key]) + "\n") + for key in sorted(dic.keys()): + line = key + "\t" + " ".join(dic[key]) + "\n" + output_file.write(line.encode('utf-8')) output_file.close() def find_target_items(input_filename, keyword): - input_file = open(input_filename, "r") + input_file = codecs.open(input_filename, "r", encoding="utf-8") for line in input_file: if len(line) == 0: continue @@ -63,6 +67,6 @@ def find_target_items(input_filename, keyword): value_list = value.split(" ") for value in value_list: if keyword in value: - print line, + sys.stdout.write(line) input_file.close() diff --git a/data/scripts/common.pyc b/data/scripts/common.pyc deleted file mode 100644 index d4df2219ba57b6659bfad17796b77f2a29053920..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2002 zcmb`I-EJFI5Xa}N?ezz7423o|MIxnKSO|4*P?1m+6*Z^`p-7xc7@4H7TG9Y}o)9)t&g|IE5^5}8$;fNOU*b|SI9;HOf z3eA6{g-3e_-_x>6PSK)DPsHTNBl8hW-=du8q|5t5@qp8Zc)mtUpZfVVs=5k|T4Y9~ zl!Dg%Pjf|I&2Q7fr?^VpxJEE#e>h$Hyf#0t*4r>MEvOIeMTHhM+JVbHLK-3vBE>-? zo;q>Rb(()juEv@}pN|pD-oY)7W>KeMou5Y%wn|IgE%@GoikkiblTav4B7+ZloSl8# zm&hOy$PiDP>0kKBv?a1F8popTIJ3nh$#G$#48JvgZjzlqO7oq z;{sUhB(Y+U|E_c zvH|mA{fNVHubj8i!;(bUk1-2o)&yA*g;{V|6o*+7JTAsb5QXNOVl*p)I5`UV#!(c+ zp$h{WnRMc85EbKbk>8%2$i?}716*ZnIWhJf_Mtk;_uA@;@>NB3Ra-UHEw!mOy_VPU zy6UDUh&G{lisD9j7Ht+xwq};~s}RDAkU7Xw>-1~byL2YmuSt#) zW0Mlg8`!Y#LY&3&Dg)_tfM!R%;ay#U`CbVoUz6YNdl>dP#D(5D|D{CoCvKtKH7kq& z8Zo!F4`_OgyCzrmY|E^2Z5c^4sdZz_PS<5)JXx)`p*;N@NY=zkiKI{Sn~dPzL03S5 zXS7$J%v1JufBKa0?ZdrvPy~f53mIIpmy@AihnoC@Twp?q-;?5Vkp&Upaa@?Vto8Xa z>n*l}ajCtAVR@A1@xX=VFj=*nJJ__>*%j(oHk@^3H&n;l?+MvgprKh$zX`F9O=g$W X3^~a=<077A$u2v!{5mzSH*fq6%tM4L diff --git a/data/scripts/find_target.py b/data/scripts/find_target.py index 09f23de..4953bbb 100755 --- a/data/scripts/find_target.py +++ b/data/scripts/find_target.py @@ -4,8 +4,8 @@ import sys from common import find_target_items if len(sys.argv) != 3: - print "Find the value keyword in all pairs" - print "Usage: ", sys.argv[0], "[input] [keyword]" + print("Find the value keyword in all pairs") + print("Usage: ", sys.argv[0], "[input] [keyword]") exit(1) find_target_items(sys.argv[1], sys.argv[2]) diff --git a/data/scripts/merge.py b/data/scripts/merge.py index 09f97b7..fcbe588 100755 --- a/data/scripts/merge.py +++ b/data/scripts/merge.py @@ -1,25 +1,26 @@ #!/usr/bin/env python #coding: utf-8 +import codecs import sys from common import sort_items if len(sys.argv) < 4: - print "Merge and sort all text dictionaries" - print "Usage: ", sys.argv[0], "[input1] [input2] … [inputN] [output]" + print("Merge and sort all text dictionaries") + print("Usage: ", sys.argv[0], "[input1] [input2] ... [inputN] [output]") exit(1) all_lines = [] for i in range(1, len(sys.argv) - 1): - input_file = open(sys.argv[i], "r") + input_file = codecs.open(sys.argv[i], "r", encoding="utf-8") for line in input_file: all_lines += line input_file.close() all_lines += '\n' output_filename = sys.argv[-1] -output_file = open(output_filename, "w") +output_file = open(output_filename, "wb") for line in all_lines: - output_file.write(line) + output_file.write(line.encode('utf-8')) output_file.close() sort_items(output_filename, output_filename) diff --git a/data/scripts/reverse.py b/data/scripts/reverse.py index bd597be..2d2d77f 100755 --- a/data/scripts/reverse.py +++ b/data/scripts/reverse.py @@ -4,8 +4,8 @@ import sys from common import reverse_items if len(sys.argv) != 3: - print "Reverse key and value of all pairs" - print "Usage: ", sys.argv[0], "[input] [output]" + print("Reverse key and value of all pairs") + print("Usage: ", sys.argv[0], "[input] [output]") exit(1) reverse_items(sys.argv[1], sys.argv[2]) diff --git a/data/scripts/sort.py b/data/scripts/sort.py index 1d910d2..fb5f5ad 100755 --- a/data/scripts/sort.py +++ b/data/scripts/sort.py @@ -3,9 +3,16 @@ import sys from common import sort_items -if len(sys.argv) != 3: - print "Sort the dictionary" - print "Usage: ", sys.argv[0], "[input] [output]" +if len(sys.argv) < 2: + print("Sort the dictionary") + print("Usage: ", sys.argv[0], "[input] ([output])") exit(1) -sort_items(sys.argv[1], sys.argv[2]) +input = sys.argv[1] + +if len(sys.argv) < 3: + output = input +else: + output = sys.argv[2] + +sort_items(input, output) diff --git a/data/scripts/sort_all.py b/data/scripts/sort_all.py new file mode 100755 index 0000000..0f6614a --- /dev/null +++ b/data/scripts/sort_all.py @@ -0,0 +1,16 @@ +#!/usr/bin/env python +#coding: utf-8 +import glob +import sys +from common import sort_items + +if len(sys.argv) < 2: + print("Sort the dictionary") + print("Usage: ", sys.argv[0], "[directory]") + exit(1) + +dirtectory = sys.argv[1] +files = glob.glob(dirtectory + "/*") +for filename in files: + print(filename) + sort_items(filename, filename) diff --git a/data/tw/from_tw_variants.txt b/data/tw/from_tw_variants.txt deleted file mode 100644 index 895d089..0000000 --- a/data/tw/from_tw_variants.txt +++ /dev/null @@ -1,22 +0,0 @@ -偽 僞 -啟 啓 -囪 囱 -媯 嬀 -床 牀 -汙 污 -洩 泄 -湧 涌 -溈 潙 -灶 竈 -為 爲 -眾 衆 -稜 棱 -群 羣 -裡 裏 -踴 踊 -雞 鷄 -麴 麯 -麵 麪 -著 着 -凶 兇 -峰 峯 diff --git a/data/tw/merge.sh b/data/tw/merge.sh deleted file mode 100755 index 5e4bbf2..0000000 --- a/data/tw/merge.sh +++ /dev/null @@ -1 +0,0 @@ -python ../scripts/merge.py tw-it.txt tw-other.txt tw-name.txt to_tw_phrases.txt diff --git a/data/tw/to_tw_phrases.txt b/data/tw/to_tw_phrases.txt deleted file mode 100644 index 0552e7c..0000000 --- a/data/tw/to_tw_phrases.txt +++ /dev/null @@ -1,410 +0,0 @@ -PN結 PN接面 -SQL注入 SQL隱碼攻擊 -SQL注入攻擊 SQL隱碼攻擊 -三極管 三極體 -下拉列表 下拉選單 -並行計算 平行計算 -中間件 中介軟體 -串口 串列埠 -串行 序列 -串行端口 串列埠 -主引導記錄 主開機記錄 -主板 主機板 -乍得 查德 -也門 葉門 -二極管 二極體 -互聯網 網際網路 -交互 互動 -交互式 互動式 -人工智能 人工智慧 -仙童半導體 快捷半導體 -代碼 程式碼 代碼 -代碼頁 內碼表 -以太網 乙太網 -任務欄 工作列 -任務管理器 工作管理員 -仿真 模擬 -伯利茲 貝里斯 -位圖 點陣圖 -低級 低階 低級 -佛得角 維德角 -便攜式 行動式 攜帶型 -保存 儲存 -信噪比 訊雜比 -信息 資訊 -信息安全 資訊保安 -信息技術 資訊科技 -信息論 資訊理論 -信號 訊號 信號 -信道 通道 -傅里葉 傅立葉 -傳感 感測 -像素 畫素 -僞代碼 虛擬碼 -優先級 優先順序 -元數據 後設資料 -元編程 超程式設計 -元音 母音 -光標 游標 -光盤 光碟 -光驅 光碟機 -克羅地亞 克羅埃西亞 -內核 核心 -內置 內建 -內聯函數 行內函數 -全角 全形 -兼容 相容 -冒泡排序 氣泡跑需 -凉菜 冷盤 -出租车 計程車 -函數式編程 函數語言程式設計 -刀片服務器 刀鋒伺服器 -分佈式 分散式 -分辨率 解析度 -列支敦士登 列支敦斯登 -利比里亞 賴比瑞亞 -刷新 重新整理 -刻錄 燒錄 -前綴 字首 -剪切 剪下 -剪貼板 剪貼簿 -加納 迦納 -加蓬 加彭 -加載 載入 -半角 半形 -博客 部落格 -博茨瓦納 波札那 -卡塔爾 卡達 -危地馬拉 瓜地馬拉 -卸載 解除安裝 -厄瓜多爾 厄瓜多 -厄立特里亞 厄利垂亞 -參數表 參數列 -句柄 控制代碼 -可視化 視覺化 -吉布堤 吉布地 -呼叫轉移 來電轉駁 -命令式編程 指令式程式設計 -命令行 命令列 -命名空間 名稱空間 -哈希 雜湊 -哈萨克斯坦 哈薩克 -哥斯達黎加 哥斯大黎加 -單片機 微控制器 -回調 回撥 -圖像 影象 -圖標 圖示 -圖瓦盧 吐瓦魯 -土庫曼斯坦 土庫曼 -在線 線上 -圭亞那 蓋亞那 -地址 地址 位址 -地址欄 位址列 -坦桑尼亞 坦尚尼亞 -埃塞俄比亞 衣索比亞 -城域王 都會網路 -基里巴斯 吉里巴斯 -塔吉克斯坦 塔吉克 -塞拉利昂 獅子山 -塞浦路斯 塞普勒斯 -塞舌爾 塞席爾 -壁紙 桌布 壁紙 -外置 外接 -外鍵 外來鍵 -多任務 多工 -多態 多型 -多米尼加 多明尼加 -多線程 多執行緒 -奔馳 賓士 -奶酪 乳酪 -字庫 字型檔 -字段 欄位 -字符 字元 -字符集 字符集 -字節 位元組 -字體 字型 -存盤 存檔 -安提瓜和巴布達 安地卡及巴布達 -宏內核 單核心 -密鑰 金鑰 -實例 例項 實例 -實模式 真實模式 -審覈 稽覈 -寫保護 防寫 -寬帶 寬頻 -尋址 定址 -對話框 對話方塊 -對象 物件 對象 -導入 匯入 -導出 匯出 -尼日利亞 奈及利亞 -尼日爾 尼日 -局域網 區域網 -屏幕 螢幕 -屏蔽 遮蔽 -岡比亞 甘比亞 -嵌套 巢狀 -巴巴多斯 巴貝多 -巴布亞新幾内亚 巴布亞紐幾內亞 -布基納法索 布吉納法索 -布隆迪 蒲隆地 -帕劳 帛琉 -帶寬 頻寬 -幾內亞比紹 幾內亞比索 -引導程序 載入程式 -彙編 彙編 組譯 -彙編語言 組合語言 -後綴 字尾 -循環 迴圈 循環 -性價比 價效比 -性能 效能 -意大利 義大利 -截取 擷取 -所羅門羣島 索羅門羣島 -打印 列印 -打印机 印表機 -打開 開啟 打開 -持久性 永續性 -捲積 摺積 -掃描儀 掃描器 -採樣 取樣 -接口 介面 -控件 控制項 -操作系統 作業系統 -擴展名 副檔名 -支持 支援 -支持者 支持者 -散列 雜湊 -數字 數字 數位 -數字印刷 數位印刷 -數字電子 數位電子 -數字電路 數位電路 -數據 資料 -數據倉庫 資料倉儲 -數據報 資料包 -數據庫 資料庫 -數據挖掘 資料探勘 -數據源 資料來源 -數組 陣列 -文件 檔案 -文件名 檔名 -文件夾 資料夾 -文件擴展名 副檔名 -文字處理 文書處理 -文本 文字 -文檔 文件 -文莱 汶萊 -斯威士蘭 史瓦濟蘭 -斯洛文尼亞 斯洛維尼亞 -新西蘭 紐西蘭 -方便麵 速食麵 -映射 對映 -時分多址 分時多重進接 -時分複用 分時多工 -時鐘頻率 時脈頻率 -晶閘管 閘流體 -智能 智慧 -最終用戶 終端使用者 -有損壓縮 有失真壓縮 -服務器 伺服器 -本地代碼 原生代碼 -析構函數 解構函式 -枚舉 列舉 -查看 檢視 -格林納達 格瑞那達 -格魯吉亞 喬治亞 -桌面型 桌上型 -構造函數 建構函式 -模塊 模組 -模擬 模擬 類比 -模擬電子 類比電子 -模擬電路 類比電路 -權限 許可權 -歐拉 尤拉 -正則表達式 正規表示式 -死機 宕機 -殺毒 防毒 -比特 位元 -比特率 位元率 -毛里塔尼亞 茅利塔尼亞 -毛里求斯 模里西斯 -沙特阿拉伯 沙烏地阿拉伯 -波分複用 波長分波多工 -波斯尼亞黑塞哥維那 波士尼亞赫塞哥維納 -津巴布韋 辛巴威 -洪都拉斯 宏都拉斯 -消息 訊息 消息 -源代碼 原始碼 -源文件 原始檔 -溢出 溢位 -溫納圖萬 那杜 -演示文稿 簡報 -激光 鐳射 -激活 啟用 -烏茲別克斯坦 烏茲別克 -無損壓縮 無失真壓縮 -物理内存 實體記憶體 -物理地址 實體地址 -特立尼達和多巴哥 千里達及托巴哥 -狀態欄 狀態列 -瑙魯 諾魯 -瓦努阿圖 萬那杜 -用戶 使用者 -界面 介面 -異步 非同步 -登錄 登入 -發佈 釋出 -皮膚 面板 -盤片 碟片 -盤符 碟符 -盧旺達 盧安達 -目標代碼 目的碼 -相冊 相簿 -矢量 向量 -知識產權 智慧財產權 -短信 簡訊 -硅 矽 -硬件 硬體 -硬盤 硬碟 -碼分多址 分碼多重進接 -碼率 位元速率 -磁盤 磁碟 -磁道 磁軌 -社區 社羣 社區 -科摩羅 葛摩 -科特迪瓦 象牙海岸 -移動硬盤 行動硬碟 -移動通信 行動通訊 -移動電話 行動電話 -程序 程式 -程序員 程式設計師 -空分多址 分空間多重進接 -空分複用 空間多工 -突尼斯 突尼西亞 -窗口 視窗 -端口 埠 -筆記本電腦 膝上型電腦 -算子 運算元 -算法 演算法 -範式 正規化 -粘貼 貼上 粘貼 -紅心大戰 傷心小棧 -索馬里 索馬利亞 -組件 元件 -綁定 繫結 -網上鄰居 網路上的芳鄰 -網吧 網咖 -網絡 網路 -網關 閘道器 -線程 執行緒 -編程 程式設計 -編程語言 程式語言 -緩存 快取 -縮略圖 縮圖 -縮進 縮排 -總線 匯流排 -缺省 預設 -老撾 寮國 -聖基茨和尼維斯 聖克里斯多福及尼維斯 -聖文森特和格林納丁斯 聖文森及格瑞那丁 -聖盧西亞 聖露西亞 -聖馬力諾 聖馬利諾 -聲卡 音效卡 -肯尼亞 肯亞 -脫機 離線 -腳本 指令碼 -臺式機 桌上型電腦 -航天飛機 太空梭 -芯片 晶片 -莫桑比克 莫三比克 -菜單 選單 菜單 -萊索托 賴索托 -萬維網 全球資訊網 -藍牙 藍芽 -蘇裏南 蘇利南 -虛函數 虛擬函式 -虛擬機 虛擬機器 -表達式 表示式 -複印 影印 -複選按鈕 覈取按鈕 -複選框 覈取方塊 -視圖 檢視 -視頻 視訊 -解釋器 直譯器 -觸摸屏 觸控式螢幕 -計算機安全 電腦保安 -計算機科學 電腦科學 -訪問 訪問 存取 -設備 裝置 -設置 設定 -註冊機 序號產生器 -註銷 登出 -詞組 片語 -調制 調變 -調度 排程 -調用 呼叫 -調色板 調色盤 -調製解調器 數據機 -調試 偵錯 -調試器 偵錯程式 -變量 變數 -貝寧 貝南 -贊比亞 尚比亞 -蹦极 笨豬跳 -軟件 軟體 -軟驅 軟碟機 -輔音 子音 -通信 通訊 -連接 連線 -進制 進位制 -進程 程序 進程 -過程式編程 程序式程式設計 -遞歸 遞迴 -遠程 遠端 -適配器 介面卡 -邏輯門 邏輯閘 -采样率 取樣率 -重命名 重新命名 -重裝 重灌 -重載 過載 -金屬氧化物半導體 金氧半導體 -錄像 錄影 -鏈接 連結 -鏈表 連結串列 -鏡像 映象 -門戶網站 入口網站 -門電路 閘電路 -閃存 快閃記憶體 -關係數據庫 關聯式資料庫 -阿塞拜疆 亞塞拜然 -阿拉伯聯合酋長國 阿拉伯聯合大公國 -隊列 佇列 -集成 整合 -集成電路 積體電路 -雲存儲 雲端儲存 -雲計算 雲端計算 -面向對象 物件導向 -面向過程 程序導向 -音頻 音訊 -頁眉 頁首 -頁腳 頁尾 -頭文件 標頭檔案 -頻分多址 分頻多重進接 -頻分複用 分頻多工 -類模板 類别範本 -顯像管 映象管 -顯卡 顯示卡 -顯存 視訊記憶體 -首席信息官 資訊長 -首席執行官 執行長 -首席技術官 技術長 -首席運營官 營運長 -香農 夏農 -馬爾代夫 馬爾地夫 -馬里共和國 馬利共和國 -高端 高階 進階 -高級 高階 進階 高級 -高速緩存 快取記憶體 -默認值 預設值 -點擊 點選 -鼠標 滑鼠 diff --git a/data/tw/tw-other.txt b/data/tw/tw-other.txt deleted file mode 100644 index d5359a3..0000000 --- a/data/tw/tw-other.txt +++ /dev/null @@ -1,10 +0,0 @@ -硅 矽 -詞組 片語 -奶酪 乳酪 -元音 母音 -輔音 子音 -方便麵 速食麵 -蹦极 笨豬跳 -凉菜 冷盤 -出租车 計程車 -奔馳 賓士 \ No newline at end of file diff --git a/debug.sh b/debug.sh deleted file mode 100755 index 1316a17..0000000 --- a/debug.sh +++ /dev/null @@ -1,11 +0,0 @@ -mkdir -p debug \ -&& cd debug \ -&& cmake \ - -D ENABLE_GETTEXT:BOOL=OFF \ - -D BUILD_DOCUMENTATION:BOOL=ON \ - -DCMAKE_BUILD_TYPE=Debug \ - -DCMAKE_INSTALL_PREFIX=`pwd`/root \ - .. \ -&& make \ -&& make install \ -&& make test diff --git a/deps/darts-clone/darts.h b/deps/darts-clone/darts.h new file mode 100644 index 0000000..d47b0e3 --- /dev/null +++ b/deps/darts-clone/darts.h @@ -0,0 +1,1898 @@ +#ifndef DARTS_H_ +#define DARTS_H_ + +#include +#include +#include + +#define DARTS_VERSION "0.32" + +// DARTS_THROW() throws a whose message starts with the +// file name and the line number. For example, DARTS_THROW("error message") at +// line 123 of "darts.h" throws a which has a pointer to +// "darts.h:123: exception: error message". The message is available by using +// what() as well as that of . +#define DARTS_INT_TO_STR(value) #value +#define DARTS_LINE_TO_STR(line) DARTS_INT_TO_STR(line) +#define DARTS_LINE_STR DARTS_LINE_TO_STR(__LINE__) +#define DARTS_THROW(msg) throw Darts::Details::Exception( \ + __FILE__ ":" DARTS_LINE_STR ": exception: " msg) + +namespace Darts { + +// The following namespace hides the internal types and classes. +namespace Details { + +// This header assumes that and are 32-bit integer types. +// +// Darts-clone keeps values associated with keys. The type of the values is +// . Note that the values must be positive integers because the +// most significant bit (MSB) of each value is used to represent whether the +// corresponding unit is a leaf or not. Also, the keys are represented by +// sequences of s. is the unsigned type of . +typedef char char_type; +typedef unsigned char uchar_type; +typedef int value_type; + +// The main structure of Darts-clone is an array of s, and the +// unit type is actually a wrapper of . +typedef size_t id_type; + +// is the type of callback functions for reporting the +// progress of building a dictionary. See also build() of . +// The 1st argument receives the progress value and the 2nd argument receives +// the maximum progress value. A usage example is to show the progress +// percentage, 100.0 * (the 1st argument) / (the 2nd argument). +typedef int (*progress_func_type)(std::size_t, std::size_t); + +// is the type of double-array units and it is a wrapper of +// in practice. +class DoubleArrayUnit { + public: + DoubleArrayUnit() : unit_() {} + + // has_leaf() returns whether a leaf unit is immediately derived from the + // unit (true) or not (false). + bool has_leaf() const { + return ((unit_ >> 8) & 1) == 1; + } + // value() returns the value stored in the unit, and thus value() is + // available when and only when the unit is a leaf unit. + value_type value() const { + return static_cast(unit_ & ((1U << 31) - 1)); + } + + // label() returns the label associted with the unit. Note that a leaf unit + // always returns an invalid label. For this feature, leaf unit's label() + // returns an that has the MSB of 1. + id_type label() const { + return unit_ & ((1U << 31) | 0xFF); + } + // offset() returns the offset from the unit to its derived units. + id_type offset() const { + return (unit_ >> 10) << ((unit_ & (1U << 9)) >> 6); + } + + private: + id_type unit_; + + // Copyable. +}; + +// Darts-clone throws an for memory allocation failure, invalid +// arguments or a too large offset. The last case means that there are too many +// keys in the given set of keys. Note that the `msg' of must be a +// constant or static string because an keeps only a pointer to +// that string. +class Exception : public std::exception { + public: + explicit Exception(const char *msg = NULL) throw() : msg_(msg) {} + Exception(const Exception &rhs) throw() : msg_(rhs.msg_) {} + virtual ~Exception() throw() {} + + // overrides what() of . + virtual const char *what() const throw() { + return (msg_ != NULL) ? msg_ : ""; + } + + private: + const char *msg_; + + // Disallows operator=. + Exception &operator=(const Exception &); +}; + +} // namespace Details + +// is the interface of Darts-clone. Note that other +// classes should not be accessed from outside. +// +// has 4 template arguments but only the 3rd one is used as +// the type of values. Note that the given is used only from outside, and +// the internal value type is not changed from . +// In build(), given values are casted from to +// by using static_cast. On the other hand, values are casted from +// to in searching dictionaries. +template +class DoubleArrayImpl { + public: + // Even if this is changed, the internal value type is still + // . Other types, such as 64-bit integer types + // and floating-point number types, should not be used. + typedef T value_type; + // A key is reprenseted by a sequence of s. For example, + // exactMatchSearch() takes a . + typedef Details::char_type key_type; + // In searching dictionaries, the values associated with the matched keys are + // stored into or returned as s. + typedef value_type result_type; + + // enables applications to get the lengths of the matched + // keys in addition to the values. + struct result_pair_type { + value_type value; + std::size_t length; + }; + + // The constructor initializes member variables with 0 and NULLs. + DoubleArrayImpl() : size_(0), array_(NULL), buf_(NULL) {} + // The destructor frees memory allocated for units and then initializes + // member variables with 0 and NULLs. + virtual ~DoubleArrayImpl() { + clear(); + } + + // has 2 kinds of set_result()s. The 1st set_result() is to + // set a value to a . The 2nd set_result() is to set a value and + // a length to a . By using set_result()s, search methods + // can return the 2 kinds of results in the same way. + // Why the set_result()s are non-static? It is for compatibility. + // + // The 1st set_result() takes a length as the 3rd argument but it is not + // used. If a compiler does a good job, codes for getting the length may be + // removed. + void set_result(value_type *result, value_type value, std::size_t) const { + *result = value; + } + // The 2nd set_result() uses both `value' and `length'. + void set_result(result_pair_type *result, + value_type value, std::size_t length) const { + result->value = value; + result->length = length; + } + + // set_array() calls clear() in order to free memory allocated to the old + // array and then sets a new array. This function is useful to set a memory- + // mapped array. Note that the array set by set_array() is not freed in + // clear() and the destructor of . + // set_array() can also set the size of the new array but the size is not + // used in search methods. So it works well even if the 2nd argument is 0 or + // omitted. Remember that size() and total_size() returns 0 in such a case. + void set_array(const void *ptr, std::size_t size = 0) { + clear(); + array_ = static_cast(ptr); + size_ = size; + } + // array() returns a pointer to the array of units. + const void *array() const { + return array_; + } + + // clear() frees memory allocated to units and then initializes member + // variables with 0 and NULLs. Note that clear() does not free memory if the + // array of units was set by set_array(). In such a case, `array_' is not + // NULL and `buf_' is NULL. + void clear() { + size_ = 0; + array_ = NULL; + if (buf_ != NULL) { + delete[] buf_; + buf_ = NULL; + } + } + + // unit_size() returns the size of each unit. The size must be 4 bytes. + std::size_t unit_size() const { + return sizeof(unit_type); + } + // size() returns the number of units. It can be 0 if set_array() is used. + std::size_t size() const { + return size_; + } + // total_size() returns the number of bytes allocated to the array of units. + // It can be 0 if set_array() is used. + std::size_t total_size() const { + return unit_size() * size(); + } + // nonzero_size() exists for compatibility. It always returns the number of + // units because it takes long time to count the number of non-zero units. + std::size_t nonzero_size() const { + return size(); + } + + // build() constructs a dictionary from given key-value pairs. If `lengths' + // is NULL, `keys' is handled as an array of zero-terminated strings. If + // `values' is NULL, the index in `keys' is associated with each key, i.e. + // the ith key has (i - 1) as its value. + // Note that the key-value pairs must be arranged in key order and the values + // must not be negative. Also, if there are duplicate keys, only the first + // pair will be stored in the resultant dictionary. + // `progress_func' is a pointer to a callback function. If it is not NULL, + // it will be called in build() so that the caller can check the progress of + // dictionary construction. For details, please see the definition of + // . + // The return value of build() is 0, and it indicates the success of the + // operation. Otherwise, build() throws a , which is a + // derived class of . + // build() uses another construction algorithm if `values' is not NULL. In + // this case, Darts-clone uses a Directed Acyclic Word Graph (DAWG) instead + // of a trie because a DAWG is likely to be more compact than a trie. + int build(std::size_t num_keys, const key_type * const *keys, + const std::size_t *lengths = NULL, const value_type *values = NULL, + Details::progress_func_type progress_func = NULL); + + // open() reads an array of units from the specified file. And if it goes + // well, the old array will be freed and replaced with the new array read + // from the file. `offset' specifies the number of bytes to be skipped before + // reading an array. `size' specifies the number of bytes to be read from the + // file. If the `size' is 0, the whole file will be read. + // open() returns 0 iff the operation succeeds. Otherwise, it returns a + // non-zero value or throws a . The exception is thrown + // when and only when a memory allocation fails. + int open(const char *file_name, const char *mode = "rb", + std::size_t offset = 0, std::size_t size = 0); + // save() writes the array of units into the specified file. `offset' + // specifies the number of bytes to be skipped before writing the array. + // open() returns 0 iff the operation succeeds. Otherwise, it returns a + // non-zero value. + int save(const char *file_name, const char *mode = "wb", + std::size_t offset = 0) const; + + // The 1st exactMatchSearch() tests whether the given key exists or not, and + // if it exists, its value and length are set to `result'. Otherwise, the + // value and the length of `result' are set to -1 and 0 respectively. + // Note that if `length' is 0, `key' is handled as a zero-terminated string. + // `node_pos' specifies the start position of matching. This argument enables + // the combination of exactMatchSearch() and traverse(). For example, if you + // want to test "xyzA", "xyzBC", and "xyzDE", you can use traverse() to get + // the node position corresponding to "xyz" and then you can use + // exactMatchSearch() to test "A", "BC", and "DE" from that position. + // Note that the length of `result' indicates the length from the `node_pos'. + // In the above example, the lengths are { 1, 2, 2 }, not { 4, 5, 5 }. + template + void exactMatchSearch(const key_type *key, U &result, + std::size_t length = 0, std::size_t node_pos = 0) const { + result = exactMatchSearch(key, length, node_pos); + } + // The 2nd exactMatchSearch() returns a result instead of updating the 2nd + // argument. So, the following exactMatchSearch() has only 3 arguments. + template + inline U exactMatchSearch(const key_type *key, std::size_t length = 0, + std::size_t node_pos = 0) const; + + // commonPrefixSearch() searches for keys which match a prefix of the given + // string. If `length' is 0, `key' is handled as a zero-terminated string. + // The values and the lengths of at most `max_num_results' matched keys are + // stored in `results'. commonPrefixSearch() returns the number of matched + // keys. Note that the return value can be larger than `max_num_results' if + // there are more than `max_num_results' matches. If you want to get all the + // results, allocate more spaces and call commonPrefixSearch() again. + // `node_pos' works as well as in exactMatchSearch(). + template + inline std::size_t commonPrefixSearch(const key_type *key, U *results, + std::size_t max_num_results, std::size_t length = 0, + std::size_t node_pos = 0) const; + + // In Darts-clone, a dictionary is a deterministic finite-state automaton + // (DFA) and traverse() tests transitions on the DFA. The initial state is + // `node_pos' and traverse() chooses transitions labeled key[key_pos], + // key[key_pos + 1], ... in order. If there is not a transition labeled + // key[key_pos + i], traverse() terminates the transitions at that state and + // returns -2. Otherwise, traverse() ends without a termination and returns + // -1 or a nonnegative value, -1 indicates that the final state was not an + // accept state. When a nonnegative value is returned, it is the value + // associated with the final accept state. That is, traverse() returns the + // value associated with the given key if it exists. Note that traverse() + // updates `node_pos' and `key_pos' after each transition. + inline value_type traverse(const key_type *key, std::size_t &node_pos, + std::size_t &key_pos, std::size_t length = 0) const; + + private: + typedef Details::uchar_type uchar_type; + typedef Details::id_type id_type; + typedef Details::DoubleArrayUnit unit_type; + + std::size_t size_; + const unit_type *array_; + unit_type *buf_; + + // Disallows copy and assignment. + DoubleArrayImpl(const DoubleArrayImpl &); + DoubleArrayImpl &operator=(const DoubleArrayImpl &); +}; + +// is the typical instance of . It uses +// as the type of values and it is suitable for most cases. +typedef DoubleArrayImpl DoubleArray; + +// The interface section ends here. For using Darts-clone, there is no need +// to read the remaining section, which gives the implementation of +// Darts-clone. + +// +// Member functions of DoubleArrayImpl (except build()). +// + +template +int DoubleArrayImpl::open(const char *file_name, + const char *mode, std::size_t offset, std::size_t size) { +#ifdef _MSC_VER + std::FILE *file; + if (::fopen_s(&file, file_name, mode) != 0) { + return -1; + } +#else + std::FILE *file = std::fopen(file_name, mode); + if (file == NULL) { + return -1; + } +#endif + + if (size == 0) { + if (std::fseek(file, 0, SEEK_END) != 0) { + std::fclose(file); + return -1; + } + size = std::ftell(file) - offset; + } + + if (std::fseek(file, offset, SEEK_SET) != 0) { + std::fclose(file); + return -1; + } + + size /= unit_size(); + unit_type *buf; + try { + buf = new unit_type[size]; + } catch (const std::bad_alloc &) { + std::fclose(file); + DARTS_THROW("failed to open double-array: std::bad_alloc"); + } + + if (std::fread(buf, unit_size(), size, file) != size) { + std::fclose(file); + delete[] buf; + return -1; + } + std::fclose(file); + + clear(); + + size_ = size; + array_ = buf; + buf_ = buf; + return 0; +} + +template +int DoubleArrayImpl::save(const char *file_name, + const char *mode, std::size_t) const { + if (size() == 0) { + return -1; + } + +#ifdef _MSC_VER + std::FILE *file; + if (::fopen_s(&file, file_name, mode) != 0) { + return -1; + } +#else + std::FILE *file = std::fopen(file_name, mode); + if (file == NULL) { + return -1; + } +#endif + + if (std::fwrite(array_, unit_size(), size(), file) != size()) { + std::fclose(file); + return -1; + } + std::fclose(file); + return 0; +} + +template +template +inline U DoubleArrayImpl::exactMatchSearch(const key_type *key, + std::size_t length, std::size_t node_pos) const { + U result; + set_result(&result, static_cast(-1), 0); + + unit_type unit = array_[node_pos]; + if (length != 0) { + for (std::size_t i = 0; i < length; ++i) { + node_pos ^= unit.offset() ^ static_cast(key[i]); + unit = array_[node_pos]; + if (unit.label() != static_cast(key[i])) { + return result; + } + } + } else { + for ( ; key[length] != '\0'; ++length) { + node_pos ^= unit.offset() ^ static_cast(key[length]); + unit = array_[node_pos]; + if (unit.label() != static_cast(key[length])) { + return result; + } + } + } + + if (!unit.has_leaf()) { + return result; + } + unit = array_[node_pos ^ unit.offset()]; + set_result(&result, static_cast(unit.value()), length); + return result; +} + +template +template +inline std::size_t DoubleArrayImpl::commonPrefixSearch( + const key_type *key, U *results, std::size_t max_num_results, + std::size_t length, std::size_t node_pos) const { + std::size_t num_results = 0; + + unit_type unit = array_[node_pos]; + node_pos ^= unit.offset(); + if (length != 0) { + for (std::size_t i = 0; i < length; ++i) { + node_pos ^= static_cast(key[i]); + unit = array_[node_pos]; + if (unit.label() != static_cast(key[i])) { + return num_results; + } + + node_pos ^= unit.offset(); + if (unit.has_leaf()) { + if (num_results < max_num_results) { + set_result(&results[num_results], static_cast( + array_[node_pos].value()), i + 1); + } + ++num_results; + } + } + } else { + for ( ; key[length] != '\0'; ++length) { + node_pos ^= static_cast(key[length]); + unit = array_[node_pos]; + if (unit.label() != static_cast(key[length])) { + return num_results; + } + + node_pos ^= unit.offset(); + if (unit.has_leaf()) { + if (num_results < max_num_results) { + set_result(&results[num_results], static_cast( + array_[node_pos].value()), length + 1); + } + ++num_results; + } + } + } + + return num_results; +} + +template +inline typename DoubleArrayImpl::value_type +DoubleArrayImpl::traverse(const key_type *key, + std::size_t &node_pos, std::size_t &key_pos, std::size_t length) const { + id_type id = static_cast(node_pos); + unit_type unit = array_[id]; + + if (length != 0) { + for ( ; key_pos < length; ++key_pos) { + id ^= unit.offset() ^ static_cast(key[key_pos]); + unit = array_[id]; + if (unit.label() != static_cast(key[key_pos])) { + return static_cast(-2); + } + node_pos = id; + } + } else { + for ( ; key[key_pos] != '\0'; ++key_pos) { + id ^= unit.offset() ^ static_cast(key[key_pos]); + unit = array_[id]; + if (unit.label() != static_cast(key[key_pos])) { + return static_cast(-2); + } + node_pos = id; + } + } + + if (!unit.has_leaf()) { + return static_cast(-1); + } + unit = array_[id ^ unit.offset()]; + return static_cast(unit.value()); +} + +namespace Details { + +// +// Memory management of array. +// + +template +class AutoArray { + public: + explicit AutoArray(T *array = NULL) : array_(array) {} + ~AutoArray() { + clear(); + } + + const T &operator[](std::size_t id) const { + return array_[id]; + } + T &operator[](std::size_t id) { + return array_[id]; + } + + bool empty() const { + return array_ == NULL; + } + + void clear() { + if (array_ != NULL) { + delete[] array_; + array_ = NULL; + } + } + void swap(AutoArray *array) { + T *temp = array_; + array_ = array->array_; + array->array_ = temp; + } + void reset(T *array = NULL) { + AutoArray(array).swap(this); + } + + private: + T *array_; + + // Disallows copy and assignment. + AutoArray(const AutoArray &); + AutoArray &operator=(const AutoArray &); +}; + +// +// Memory management of resizable array. +// + +template +class AutoPool { + public: + AutoPool() : buf_(), size_(0), capacity_(0) {} + ~AutoPool() { clear(); } + + const T &operator[](std::size_t id) const { + return *(reinterpret_cast(&buf_[0]) + id); + } + T &operator[](std::size_t id) { + return *(reinterpret_cast(&buf_[0]) + id); + } + + bool empty() const { + return size_ == 0; + } + std::size_t size() const { + return size_; + } + + void clear() { + resize(0); + buf_.clear(); + size_ = 0; + capacity_ = 0; + } + + void push_back(const T &value) { + append(value); + } + void pop_back() { + (*this)[--size_].~T(); + } + + void append() { + if (size_ == capacity_) + resize_buf(size_ + 1); + new(&(*this)[size_++]) T; + } + void append(const T &value) { + if (size_ == capacity_) + resize_buf(size_ + 1); + new(&(*this)[size_++]) T(value); + } + + void resize(std::size_t size) { + while (size_ > size) { + (*this)[--size_].~T(); + } + if (size > capacity_) { + resize_buf(size); + } + while (size_ < size) { + new(&(*this)[size_++]) T; + } + } + void resize(std::size_t size, const T &value) { + while (size_ > size) { + (*this)[--size_].~T(); + } + if (size > capacity_) { + resize_buf(size); + } + while (size_ < size) { + new(&(*this)[size_++]) T(value); + } + } + + void reserve(std::size_t size) { + if (size > capacity_) { + resize_buf(size); + } + } + + private: + AutoArray buf_; + std::size_t size_; + std::size_t capacity_; + + // Disallows copy and assignment. + AutoPool(const AutoPool &); + AutoPool &operator=(const AutoPool &); + + void resize_buf(std::size_t size); +}; + +template +void AutoPool::resize_buf(std::size_t size) { + std::size_t capacity; + if (size >= capacity_ * 2) { + capacity = size; + } else { + capacity = 1; + while (capacity < size) { + capacity <<= 1; + } + } + + AutoArray buf; + try { + buf.reset(new char[sizeof(T) * capacity]); + } catch (const std::bad_alloc &) { + DARTS_THROW("failed to resize pool: std::bad_alloc"); + } + + if (size_ > 0) { + T *src = reinterpret_cast(&buf_[0]); + T *dest = reinterpret_cast(&buf[0]); + for (std::size_t i = 0; i < size_; ++i) { + new(&dest[i]) T(src[i]); + src[i].~T(); + } + } + + buf_.swap(&buf); + capacity_ = capacity; +} + +// +// Memory management of stack. +// + +template +class AutoStack { + public: + AutoStack() : pool_() {} + ~AutoStack() { + clear(); + } + + const T &top() const { + return pool_[size() - 1]; + } + T &top() { + return pool_[size() - 1]; + } + + bool empty() const { + return pool_.empty(); + } + std::size_t size() const { + return pool_.size(); + } + + void push(const T &value) { + pool_.push_back(value); + } + void pop() { + pool_.pop_back(); + } + + void clear() { + pool_.clear(); + } + + private: + AutoPool pool_; + + // Disallows copy and assignment. + AutoStack(const AutoStack &); + AutoStack &operator=(const AutoStack &); +}; + +// +// Succinct bit vector. +// + +class BitVector { + public: + BitVector() : units_(), ranks_(), num_ones_(0), size_(0) {} + ~BitVector() { + clear(); + } + + bool operator[](std::size_t id) const { + return (units_[id / UNIT_SIZE] >> (id % UNIT_SIZE) & 1) == 1; + } + + id_type rank(std::size_t id) const { + std::size_t unit_id = id / UNIT_SIZE; + return ranks_[unit_id] + pop_count(units_[unit_id] + & (~0U >> (UNIT_SIZE - (id % UNIT_SIZE) - 1))); + } + + void set(std::size_t id, bool bit) { + if (bit) { + units_[id / UNIT_SIZE] |= 1U << (id % UNIT_SIZE); + } else { + units_[id / UNIT_SIZE] &= ~(1U << (id % UNIT_SIZE)); + } + } + + bool empty() const { + return units_.empty(); + } + std::size_t num_ones() const { + return num_ones_; + } + std::size_t size() const { + return size_; + } + + void append() { + if ((size_ % UNIT_SIZE) == 0) { + units_.append(0); + } + ++size_; + } + void build(); + + void clear() { + units_.clear(); + ranks_.clear(); + } + + private: + enum { UNIT_SIZE = sizeof(id_type) * 8 }; + + AutoPool units_; + AutoArray ranks_; + std::size_t num_ones_; + std::size_t size_; + + // Disallows copy and assignment. + BitVector(const BitVector &); + BitVector &operator=(const BitVector &); + + static id_type pop_count(id_type unit) { + unit = ((unit & 0xAAAAAAAA) >> 1) + (unit & 0x55555555); + unit = ((unit & 0xCCCCCCCC) >> 2) + (unit & 0x33333333); + unit = ((unit >> 4) + unit) & 0x0F0F0F0F; + unit += unit >> 8; + unit += unit >> 16; + return unit & 0xFF; + } +}; + +inline void BitVector::build() { + try { + ranks_.reset(new id_type[units_.size()]); + } catch (const std::bad_alloc &) { + DARTS_THROW("failed to build rank index: std::bad_alloc"); + } + + num_ones_ = 0; + for (std::size_t i = 0; i < units_.size(); ++i) { + ranks_[i] = num_ones_; + num_ones_ += pop_count(units_[i]); + } +} + +// +// Keyset. +// + +template +class Keyset { + public: + Keyset(std::size_t num_keys, const char_type * const *keys, + const std::size_t *lengths, const T *values) : + num_keys_(num_keys), keys_(keys), lengths_(lengths), values_(values) {} + + std::size_t num_keys() const { + return num_keys_; + } + const char_type *keys(std::size_t id) const { + return keys_[id]; + } + uchar_type keys(std::size_t key_id, std::size_t char_id) const { + if (has_lengths() && char_id >= lengths_[key_id]) + return '\0'; + return keys_[key_id][char_id]; + } + + bool has_lengths() const { + return lengths_ != NULL; + } + std::size_t lengths(std::size_t id) const { + if (has_lengths()) { + return lengths_[id]; + } + std::size_t length = 0; + while (keys_[id][length] != '\0') { + ++length; + } + return length; + } + + bool has_values() const { + return values_ != NULL; + } + value_type values(std::size_t id) const { + if (has_values()) { + return static_cast(values_[id]); + } + return static_cast(id); + } + + private: + std::size_t num_keys_; + const char_type * const * keys_; + const std::size_t *lengths_; + const T *values_; + + // Disallows copy and assignment. + Keyset(const Keyset &); + Keyset &operator=(const Keyset &); +}; + +// +// Node of Directed Acyclic Word Graph (DAWG). +// + +class DawgNode { + public: + DawgNode() : child_(0), sibling_(0), label_('\0'), + is_state_(false), has_sibling_(false) {} + + void set_child(id_type child) { + child_ = child; + } + void set_sibling(id_type sibling) { + sibling_ = sibling; + } + void set_value(value_type value) { + child_ = value; + } + void set_label(uchar_type label) { + label_ = label; + } + void set_is_state(bool is_state) { + is_state_ = is_state; + } + void set_has_sibling(bool has_sibling) { + has_sibling_ = has_sibling; + } + + id_type child() const { + return child_; + } + id_type sibling() const { + return sibling_; + } + value_type value() const { + return static_cast(child_); + } + uchar_type label() const { + return label_; + } + bool is_state() const { + return is_state_; + } + bool has_sibling() const { + return has_sibling_; + } + + id_type unit() const { + if (label_ == '\0') { + return (child_ << 1) | (has_sibling_ ? 1 : 0); + } + return (child_ << 2) | (is_state_ ? 2 : 0) | (has_sibling_ ? 1 : 0); + } + + private: + id_type child_; + id_type sibling_; + uchar_type label_; + bool is_state_; + bool has_sibling_; + + // Copyable. +}; + +// +// Fixed unit of Directed Acyclic Word Graph (DAWG). +// + +class DawgUnit { + public: + explicit DawgUnit(id_type unit = 0) : unit_(unit) {} + DawgUnit(const DawgUnit &unit) : unit_(unit.unit_) {} + + DawgUnit &operator=(id_type unit) { + unit_ = unit; + return *this; + } + + id_type unit() const { + return unit_; + } + + id_type child() const { + return unit_ >> 2; + } + bool has_sibling() const { + return (unit_ & 1) == 1; + } + value_type value() const { + return static_cast(unit_ >> 1); + } + bool is_state() const { + return (unit_ & 2) == 2; + } + + private: + id_type unit_; + + // Copyable. +}; + +// +// Directed Acyclic Word Graph (DAWG) builder. +// + +class DawgBuilder { + public: + DawgBuilder() : nodes_(), units_(), labels_(), is_intersections_(), + table_(), node_stack_(), recycle_bin_(), num_states_(0) {} + ~DawgBuilder() { + clear(); + } + + id_type root() const { + return 0; + } + + id_type child(id_type id) const { + return units_[id].child(); + } + id_type sibling(id_type id) const { + return units_[id].has_sibling() ? (id + 1) : 0; + } + int value(id_type id) const { + return units_[id].value(); + } + + bool is_leaf(id_type id) const { + return label(id) == '\0'; + } + uchar_type label(id_type id) const { + return labels_[id]; + } + + bool is_intersection(id_type id) const { + return is_intersections_[id]; + } + id_type intersection_id(id_type id) const { + return is_intersections_.rank(id) - 1; + } + + std::size_t num_intersections() const { + return is_intersections_.num_ones(); + } + + std::size_t size() const { + return units_.size(); + } + + void init(); + void finish(); + + void insert(const char *key, std::size_t length, value_type value); + + void clear(); + + private: + enum { INITIAL_TABLE_SIZE = 1 << 10 }; + + AutoPool nodes_; + AutoPool units_; + AutoPool labels_; + BitVector is_intersections_; + AutoPool table_; + AutoStack node_stack_; + AutoStack recycle_bin_; + std::size_t num_states_; + + // Disallows copy and assignment. + DawgBuilder(const DawgBuilder &); + DawgBuilder &operator=(const DawgBuilder &); + + void flush(id_type id); + + void expand_table(); + + id_type find_unit(id_type id, id_type *hash_id) const; + id_type find_node(id_type node_id, id_type *hash_id) const; + + bool are_equal(id_type node_id, id_type unit_id) const; + + id_type hash_unit(id_type id) const; + id_type hash_node(id_type id) const; + + id_type append_node(); + id_type append_unit(); + + void free_node(id_type id) { + recycle_bin_.push(id); + } + + static id_type hash(id_type key) { + key = ~key + (key << 15); // key = (key << 15) - key - 1; + key = key ^ (key >> 12); + key = key + (key << 2); + key = key ^ (key >> 4); + key = key * 2057; // key = (key + (key << 3)) + (key << 11); + key = key ^ (key >> 16); + return key; + } +}; + +inline void DawgBuilder::init() { + table_.resize(INITIAL_TABLE_SIZE, 0); + + append_node(); + append_unit(); + + num_states_ = 1; + + nodes_[0].set_label(0xFF); + node_stack_.push(0); +} + +inline void DawgBuilder::finish() { + flush(0); + + units_[0] = nodes_[0].unit(); + labels_[0] = nodes_[0].label(); + + nodes_.clear(); + table_.clear(); + node_stack_.clear(); + recycle_bin_.clear(); + + is_intersections_.build(); +} + +inline void DawgBuilder::insert(const char *key, std::size_t length, + value_type value) { + if (value < 0) { + DARTS_THROW("failed to insert key: negative value"); + } else if (length == 0) { + DARTS_THROW("failed to insert key: zero-length key"); + } + + id_type id = 0; + std::size_t key_pos = 0; + + for ( ; key_pos <= length; ++key_pos) { + id_type child_id = nodes_[id].child(); + if (child_id == 0) { + break; + } + + uchar_type key_label = static_cast(key[key_pos]); + if (key_pos < length && key_label == '\0') { + DARTS_THROW("failed to insert key: invalid null character"); + } + + uchar_type unit_label = nodes_[child_id].label(); + if (key_label < unit_label) { + DARTS_THROW("failed to insert key: wrong key order"); + } else if (key_label > unit_label) { + nodes_[child_id].set_has_sibling(true); + flush(child_id); + break; + } + id = child_id; + } + + if (key_pos > length) { + return; + } + + for ( ; key_pos <= length; ++key_pos) { + uchar_type key_label = static_cast( + (key_pos < length) ? key[key_pos] : '\0'); + id_type child_id = append_node(); + + if (nodes_[id].child() == 0) { + nodes_[child_id].set_is_state(true); + } + nodes_[child_id].set_sibling(nodes_[id].child()); + nodes_[child_id].set_label(key_label); + nodes_[id].set_child(child_id); + node_stack_.push(child_id); + + id = child_id; + } + nodes_[id].set_value(value); +} + +inline void DawgBuilder::clear() { + nodes_.clear(); + units_.clear(); + labels_.clear(); + is_intersections_.clear(); + table_.clear(); + node_stack_.clear(); + recycle_bin_.clear(); + num_states_ = 0; +} + +inline void DawgBuilder::flush(id_type id) { + while (node_stack_.top() != id) { + id_type node_id = node_stack_.top(); + node_stack_.pop(); + + if (num_states_ >= table_.size() - (table_.size() >> 2)) { + expand_table(); + } + + id_type num_siblings = 0; + for (id_type i = node_id; i != 0; i = nodes_[i].sibling()) { + ++num_siblings; + } + + id_type hash_id; + id_type match_id = find_node(node_id, &hash_id); + if (match_id != 0) { + is_intersections_.set(match_id, true); + } else { + id_type unit_id = 0; + for (id_type i = 0; i < num_siblings; ++i) { + unit_id = append_unit(); + } + for (id_type i = node_id; i != 0; i = nodes_[i].sibling()) { + units_[unit_id] = nodes_[i].unit(); + labels_[unit_id] = nodes_[i].label(); + --unit_id; + } + match_id = unit_id + 1; + table_[hash_id] = match_id; + ++num_states_; + } + + for (id_type i = node_id, next; i != 0; i = next) { + next = nodes_[i].sibling(); + free_node(i); + } + + nodes_[node_stack_.top()].set_child(match_id); + } + node_stack_.pop(); +} + +inline void DawgBuilder::expand_table() { + std::size_t table_size = table_.size() << 1; + table_.clear(); + table_.resize(table_size, 0); + + for (std::size_t i = 1; i < units_.size(); ++i) { + id_type id = static_cast(i); + if (labels_[id] == '\0' || units_[id].is_state()) { + id_type hash_id; + find_unit(id, &hash_id); + table_[hash_id] = id; + } + } +} + +inline id_type DawgBuilder::find_unit(id_type id, id_type *hash_id) const { + *hash_id = hash_unit(id) % table_.size(); + for ( ; ; *hash_id = (*hash_id + 1) % table_.size()) { + id_type unit_id = table_[*hash_id]; + if (unit_id == 0) { + break; + } + + // There must not be the same unit. + } + return 0; +} + +inline id_type DawgBuilder::find_node(id_type node_id, + id_type *hash_id) const { + *hash_id = hash_node(node_id) % table_.size(); + for ( ; ; *hash_id = (*hash_id + 1) % table_.size()) { + id_type unit_id = table_[*hash_id]; + if (unit_id == 0) { + break; + } + + if (are_equal(node_id, unit_id)) { + return unit_id; + } + } + return 0; +} + +inline bool DawgBuilder::are_equal(id_type node_id, id_type unit_id) const { + for (id_type i = nodes_[node_id].sibling(); i != 0; + i = nodes_[i].sibling()) { + if (units_[unit_id].has_sibling() == false) { + return false; + } + ++unit_id; + } + if (units_[unit_id].has_sibling() == true) { + return false; + } + + for (id_type i = node_id; i != 0; i = nodes_[i].sibling(), --unit_id) { + if (nodes_[i].unit() != units_[unit_id].unit() || + nodes_[i].label() != labels_[unit_id]) { + return false; + } + } + return true; +} + +inline id_type DawgBuilder::hash_unit(id_type id) const { + id_type hash_value = 0; + for ( ; id != 0; ++id) { + id_type unit = units_[id].unit(); + uchar_type label = labels_[id]; + hash_value ^= hash((label << 24) ^ unit); + + if (units_[id].has_sibling() == false) { + break; + } + } + return hash_value; +} + +inline id_type DawgBuilder::hash_node(id_type id) const { + id_type hash_value = 0; + for ( ; id != 0; id = nodes_[id].sibling()) { + id_type unit = nodes_[id].unit(); + uchar_type label = nodes_[id].label(); + hash_value ^= hash((label << 24) ^ unit); + } + return hash_value; +} + +inline id_type DawgBuilder::append_unit() { + is_intersections_.append(); + units_.append(); + labels_.append(); + + return static_cast(is_intersections_.size() - 1); +} + +inline id_type DawgBuilder::append_node() { + id_type id; + if (recycle_bin_.empty()) { + id = static_cast(nodes_.size()); + nodes_.append(); + } else { + id = recycle_bin_.top(); + nodes_[id] = DawgNode(); + recycle_bin_.pop(); + } + return id; +} + +// +// Unit of double-array builder. +// + +class DoubleArrayBuilderUnit { + public: + DoubleArrayBuilderUnit() : unit_(0) {} + + void set_has_leaf(bool has_leaf) { + if (has_leaf) { + unit_ |= 1U << 8; + } else { + unit_ &= ~(1U << 8); + } + } + void set_value(value_type value) { + unit_ = value | (1U << 31); + } + void set_label(uchar_type label) { + unit_ = (unit_ & ~0xFFU) | label; + } + void set_offset(id_type offset) { + if (offset >= 1U << 29) { + DARTS_THROW("failed to modify unit: too large offset"); + } + unit_ &= (1U << 31) | (1U << 8) | 0xFF; + if (offset < 1U << 21) { + unit_ |= (offset << 10); + } else { + unit_ |= (offset << 2) | (1U << 9); + } + } + + private: + id_type unit_; + + // Copyable. +}; + +// +// Extra unit of double-array builder. +// + +class DoubleArrayBuilderExtraUnit { + public: + DoubleArrayBuilderExtraUnit() : prev_(0), next_(0), + is_fixed_(false), is_used_(false) {} + + void set_prev(id_type prev) { + prev_ = prev; + } + void set_next(id_type next) { + next_ = next; + } + void set_is_fixed(bool is_fixed) { + is_fixed_ = is_fixed; + } + void set_is_used(bool is_used) { + is_used_ = is_used; + } + + id_type prev() const { + return prev_; + } + id_type next() const { + return next_; + } + bool is_fixed() const { + return is_fixed_; + } + bool is_used() const { + return is_used_; + } + + private: + id_type prev_; + id_type next_; + bool is_fixed_; + bool is_used_; + + // Copyable. +}; + +// +// DAWG -> double-array converter. +// + +class DoubleArrayBuilder { + public: + explicit DoubleArrayBuilder(progress_func_type progress_func) + : progress_func_(progress_func), units_(), extras_(), labels_(), + table_(), extras_head_(0) {} + ~DoubleArrayBuilder() { + clear(); + } + + template + void build(const Keyset &keyset); + void copy(std::size_t *size_ptr, DoubleArrayUnit **buf_ptr) const; + + void clear(); + + private: + enum { BLOCK_SIZE = 256 }; + enum { NUM_EXTRA_BLOCKS = 16 }; + enum { NUM_EXTRAS = BLOCK_SIZE * NUM_EXTRA_BLOCKS }; + + enum { UPPER_MASK = 0xFF << 21 }; + enum { LOWER_MASK = 0xFF }; + + typedef DoubleArrayBuilderUnit unit_type; + typedef DoubleArrayBuilderExtraUnit extra_type; + + progress_func_type progress_func_; + AutoPool units_; + AutoArray extras_; + AutoPool labels_; + AutoArray table_; + id_type extras_head_; + + // Disallows copy and assignment. + DoubleArrayBuilder(const DoubleArrayBuilder &); + DoubleArrayBuilder &operator=(const DoubleArrayBuilder &); + + std::size_t num_blocks() const { + return units_.size() / BLOCK_SIZE; + } + + const extra_type &extras(id_type id) const { + return extras_[id % NUM_EXTRAS]; + } + extra_type &extras(id_type id) { + return extras_[id % NUM_EXTRAS]; + } + + template + void build_dawg(const Keyset &keyset, DawgBuilder *dawg_builder); + void build_from_dawg(const DawgBuilder &dawg); + void build_from_dawg(const DawgBuilder &dawg, + id_type dawg_id, id_type dic_id); + id_type arrange_from_dawg(const DawgBuilder &dawg, + id_type dawg_id, id_type dic_id); + + template + void build_from_keyset(const Keyset &keyset); + template + void build_from_keyset(const Keyset &keyset, std::size_t begin, + std::size_t end, std::size_t depth, id_type dic_id); + template + id_type arrange_from_keyset(const Keyset &keyset, std::size_t begin, + std::size_t end, std::size_t depth, id_type dic_id); + + id_type find_valid_offset(id_type id) const; + bool is_valid_offset(id_type id, id_type offset) const; + + void reserve_id(id_type id); + void expand_units(); + + void fix_all_blocks(); + void fix_block(id_type block_id); +}; + +template +void DoubleArrayBuilder::build(const Keyset &keyset) { + if (keyset.has_values()) { + Details::DawgBuilder dawg_builder; + build_dawg(keyset, &dawg_builder); + build_from_dawg(dawg_builder); + dawg_builder.clear(); + } else { + build_from_keyset(keyset); + } +} + +inline void DoubleArrayBuilder::copy(std::size_t *size_ptr, + DoubleArrayUnit **buf_ptr) const { + if (size_ptr != NULL) { + *size_ptr = units_.size(); + } + if (buf_ptr != NULL) { + *buf_ptr = new DoubleArrayUnit[units_.size()]; + unit_type *units = reinterpret_cast(*buf_ptr); + for (std::size_t i = 0; i < units_.size(); ++i) { + units[i] = units_[i]; + } + } +} + +inline void DoubleArrayBuilder::clear() { + units_.clear(); + extras_.clear(); + labels_.clear(); + table_.clear(); + extras_head_ = 0; +} + +template +void DoubleArrayBuilder::build_dawg(const Keyset &keyset, + DawgBuilder *dawg_builder) { + dawg_builder->init(); + for (std::size_t i = 0; i < keyset.num_keys(); ++i) { + dawg_builder->insert(keyset.keys(i), keyset.lengths(i), keyset.values(i)); + if (progress_func_ != NULL) { + progress_func_(i + 1, keyset.num_keys() + 1); + } + } + dawg_builder->finish(); +} + +inline void DoubleArrayBuilder::build_from_dawg(const DawgBuilder &dawg) { + std::size_t num_units = 1; + while (num_units < dawg.size()) { + num_units <<= 1; + } + units_.reserve(num_units); + + table_.reset(new id_type[dawg.num_intersections()]); + for (std::size_t i = 0; i < dawg.num_intersections(); ++i) { + table_[i] = 0; + } + + extras_.reset(new extra_type[NUM_EXTRAS]); + + reserve_id(0); + extras(0).set_is_used(true); + units_[0].set_offset(1); + units_[0].set_label('\0'); + + if (dawg.child(dawg.root()) != 0) { + build_from_dawg(dawg, dawg.root(), 0); + } + + fix_all_blocks(); + + extras_.clear(); + labels_.clear(); + table_.clear(); +} + +inline void DoubleArrayBuilder::build_from_dawg(const DawgBuilder &dawg, + id_type dawg_id, id_type dic_id) { + id_type dawg_child_id = dawg.child(dawg_id); + if (dawg.is_intersection(dawg_child_id)) { + id_type intersection_id = dawg.intersection_id(dawg_child_id); + id_type offset = table_[intersection_id]; + if (offset != 0) { + offset ^= dic_id; + if (!(offset & UPPER_MASK) || !(offset & LOWER_MASK)) { + if (dawg.is_leaf(dawg_child_id)) { + units_[dic_id].set_has_leaf(true); + } + units_[dic_id].set_offset(offset); + return; + } + } + } + + id_type offset = arrange_from_dawg(dawg, dawg_id, dic_id); + if (dawg.is_intersection(dawg_child_id)) { + table_[dawg.intersection_id(dawg_child_id)] = offset; + } + + do { + uchar_type child_label = dawg.label(dawg_child_id); + id_type dic_child_id = offset ^ child_label; + if (child_label != '\0') { + build_from_dawg(dawg, dawg_child_id, dic_child_id); + } + dawg_child_id = dawg.sibling(dawg_child_id); + } while (dawg_child_id != 0); +} + +inline id_type DoubleArrayBuilder::arrange_from_dawg(const DawgBuilder &dawg, + id_type dawg_id, id_type dic_id) { + labels_.resize(0); + + id_type dawg_child_id = dawg.child(dawg_id); + while (dawg_child_id != 0) { + labels_.append(dawg.label(dawg_child_id)); + dawg_child_id = dawg.sibling(dawg_child_id); + } + + id_type offset = find_valid_offset(dic_id); + units_[dic_id].set_offset(dic_id ^ offset); + + dawg_child_id = dawg.child(dawg_id); + for (std::size_t i = 0; i < labels_.size(); ++i) { + id_type dic_child_id = offset ^ labels_[i]; + reserve_id(dic_child_id); + + if (dawg.is_leaf(dawg_child_id)) { + units_[dic_id].set_has_leaf(true); + units_[dic_child_id].set_value(dawg.value(dawg_child_id)); + } else { + units_[dic_child_id].set_label(labels_[i]); + } + + dawg_child_id = dawg.sibling(dawg_child_id); + } + extras(offset).set_is_used(true); + + return offset; +} + +template +void DoubleArrayBuilder::build_from_keyset(const Keyset &keyset) { + std::size_t num_units = 1; + while (num_units < keyset.num_keys()) { + num_units <<= 1; + } + units_.reserve(num_units); + + extras_.reset(new extra_type[NUM_EXTRAS]); + + reserve_id(0); + extras(0).set_is_used(true); + units_[0].set_offset(1); + units_[0].set_label('\0'); + + if (keyset.num_keys() > 0) { + build_from_keyset(keyset, 0, keyset.num_keys(), 0, 0); + } + + fix_all_blocks(); + + extras_.clear(); + labels_.clear(); +} + +template +void DoubleArrayBuilder::build_from_keyset(const Keyset &keyset, + std::size_t begin, std::size_t end, std::size_t depth, id_type dic_id) { + id_type offset = arrange_from_keyset(keyset, begin, end, depth, dic_id); + + while (begin < end) { + if (keyset.keys(begin, depth) != '\0') { + break; + } + ++begin; + } + if (begin == end) { + return; + } + + std::size_t last_begin = begin; + uchar_type last_label = keyset.keys(begin, depth); + while (++begin < end) { + uchar_type label = keyset.keys(begin, depth); + if (label != last_label) { + build_from_keyset(keyset, last_begin, begin, + depth + 1, offset ^ last_label); + last_begin = begin; + last_label = keyset.keys(begin, depth); + } + } + build_from_keyset(keyset, last_begin, end, depth + 1, offset ^ last_label); +} + +template +id_type DoubleArrayBuilder::arrange_from_keyset(const Keyset &keyset, + std::size_t begin, std::size_t end, std::size_t depth, id_type dic_id) { + labels_.resize(0); + + value_type value = -1; + for (std::size_t i = begin; i < end; ++i) { + uchar_type label = keyset.keys(i, depth); + if (label == '\0') { + if (keyset.has_lengths() && depth < keyset.lengths(i)) { + DARTS_THROW("failed to build double-array: " + "invalid null character"); + } else if (keyset.values(i) < 0) { + DARTS_THROW("failed to build double-array: negative value"); + } + + if (value == -1) { + value = keyset.values(i); + } + if (progress_func_ != NULL) { + progress_func_(i + 1, keyset.num_keys() + 1); + } + } + + if (labels_.empty()) { + labels_.append(label); + } else if (label != labels_[labels_.size() - 1]) { + if (label < labels_[labels_.size() - 1]) { + DARTS_THROW("failed to build double-array: wrong key order"); + } + labels_.append(label); + } + } + + id_type offset = find_valid_offset(dic_id); + units_[dic_id].set_offset(dic_id ^ offset); + + for (std::size_t i = 0; i < labels_.size(); ++i) { + id_type dic_child_id = offset ^ labels_[i]; + reserve_id(dic_child_id); + if (labels_[i] == '\0') { + units_[dic_id].set_has_leaf(true); + units_[dic_child_id].set_value(value); + } else { + units_[dic_child_id].set_label(labels_[i]); + } + } + extras(offset).set_is_used(true); + + return offset; +} + +inline id_type DoubleArrayBuilder::find_valid_offset(id_type id) const { + if (extras_head_ >= units_.size()) { + return units_.size() | (id & LOWER_MASK); + } + + id_type unfixed_id = extras_head_; + do { + id_type offset = unfixed_id ^ labels_[0]; + if (is_valid_offset(id, offset)) { + return offset; + } + unfixed_id = extras(unfixed_id).next(); + } while (unfixed_id != extras_head_); + + return units_.size() | (id & LOWER_MASK); +} + +inline bool DoubleArrayBuilder::is_valid_offset(id_type id, + id_type offset) const { + if (extras(offset).is_used()) { + return false; + } + + id_type rel_offset = id ^ offset; + if ((rel_offset & LOWER_MASK) && (rel_offset & UPPER_MASK)) { + return false; + } + + for (std::size_t i = 1; i < labels_.size(); ++i) { + if (extras(offset ^ labels_[i]).is_fixed()) { + return false; + } + } + + return true; +} + +inline void DoubleArrayBuilder::reserve_id(id_type id) { + if (id >= units_.size()) { + expand_units(); + } + + if (id == extras_head_) { + extras_head_ = extras(id).next(); + if (extras_head_ == id) { + extras_head_ = units_.size(); + } + } + extras(extras(id).prev()).set_next(extras(id).next()); + extras(extras(id).next()).set_prev(extras(id).prev()); + extras(id).set_is_fixed(true); +} + +inline void DoubleArrayBuilder::expand_units() { + id_type src_num_units = units_.size(); + id_type src_num_blocks = num_blocks(); + + id_type dest_num_units = src_num_units + BLOCK_SIZE; + id_type dest_num_blocks = src_num_blocks + 1; + + if (dest_num_blocks > NUM_EXTRA_BLOCKS) { + fix_block(src_num_blocks - NUM_EXTRA_BLOCKS); + } + + units_.resize(dest_num_units); + + if (dest_num_blocks > NUM_EXTRA_BLOCKS) { + for (std::size_t id = src_num_units; id < dest_num_units; ++id) { + extras(id).set_is_used(false); + extras(id).set_is_fixed(false); + } + } + + for (id_type i = src_num_units + 1; i < dest_num_units; ++i) { + extras(i - 1).set_next(i); + extras(i).set_prev(i - 1); + } + + extras(src_num_units).set_prev(dest_num_units - 1); + extras(dest_num_units - 1).set_next(src_num_units); + + extras(src_num_units).set_prev(extras(extras_head_).prev()); + extras(dest_num_units - 1).set_next(extras_head_); + + extras(extras(extras_head_).prev()).set_next(src_num_units); + extras(extras_head_).set_prev(dest_num_units - 1); +} + +inline void DoubleArrayBuilder::fix_all_blocks() { + id_type begin = 0; + if (num_blocks() > NUM_EXTRA_BLOCKS) { + begin = num_blocks() - NUM_EXTRA_BLOCKS; + } + id_type end = num_blocks(); + + for (id_type block_id = begin; block_id != end; ++block_id) { + fix_block(block_id); + } +} + +inline void DoubleArrayBuilder::fix_block(id_type block_id) { + id_type begin = block_id * BLOCK_SIZE; + id_type end = begin + BLOCK_SIZE; + + id_type unused_offset = 0; + for (id_type offset = begin; offset != end; ++offset) { + if (!extras(offset).is_used()) { + unused_offset = offset; + break; + } + } + + for (id_type id = begin; id != end; ++id) { + if (!extras(id).is_fixed()) { + reserve_id(id); + units_[id].set_label(static_cast(id ^ unused_offset)); + } + } +} + +} // namespace Details + +// +// Member function build() of DoubleArrayImpl. +// + +template +int DoubleArrayImpl::build(std::size_t num_keys, + const key_type * const *keys, const std::size_t *lengths, + const value_type *values, Details::progress_func_type progress_func) { + Details::Keyset keyset(num_keys, keys, lengths, values); + + Details::DoubleArrayBuilder builder(progress_func); + builder.build(keyset); + + std::size_t size = 0; + unit_type *buf = NULL; + builder.copy(&size, &buf); + + clear(); + + size_ = size; + array_ = buf; + buf_ = buf; + + if (progress_func != NULL) { + progress_func(num_keys + 1, num_keys + 1); + } + + return 0; +} + +} // namespace Darts + +#undef DARTS_INT_TO_STR +#undef DARTS_LINE_TO_STR +#undef DARTS_LINE_STR +#undef DARTS_THROW + +#endif // DARTS_H_ diff --git a/deps/rapidjson-0.11/document.h b/deps/rapidjson-0.11/document.h new file mode 100755 index 0000000..402b65d --- /dev/null +++ b/deps/rapidjson-0.11/document.h @@ -0,0 +1,821 @@ +#ifndef RAPIDJSON_DOCUMENT_H_ +#define RAPIDJSON_DOCUMENT_H_ + +#include "reader.h" +#include "internal/strfunc.h" +#include // placement new + +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable : 4127) // conditional expression is constant +#endif + +namespace rapidjson { + +/////////////////////////////////////////////////////////////////////////////// +// GenericValue + +//! Represents a JSON value. Use Value for UTF8 encoding and default allocator. +/*! + A JSON value can be one of 7 types. This class is a variant type supporting + these types. + + Use the Value if UTF8 and default allocator + + \tparam Encoding Encoding of the value. (Even non-string values need to have the same encoding in a document) + \tparam Allocator Allocator type for allocating memory of object, array and string. +*/ +#pragma pack (push, 4) +template > +class GenericValue { +public: + //! Name-value pair in an object. + struct Member { + GenericValue name; //!< name of member (must be a string) + GenericValue value; //!< value of member. + }; + + typedef Encoding EncodingType; //!< Encoding type from template parameter. + typedef Allocator AllocatorType; //!< Allocator type from template parameter. + typedef typename Encoding::Ch Ch; //!< Character type derived from Encoding. + typedef Member* MemberIterator; //!< Member iterator for iterating in object. + typedef const Member* ConstMemberIterator; //!< Constant member iterator for iterating in object. + typedef GenericValue* ValueIterator; //!< Value iterator for iterating in array. + typedef const GenericValue* ConstValueIterator; //!< Constant value iterator for iterating in array. + + //!@name Constructors and destructor. + //@{ + + //! Default constructor creates a null value. + GenericValue() : flags_(kNullFlag) {} + + //! Copy constructor is not permitted. +private: + GenericValue(const GenericValue& rhs); + +public: + + //! Constructor with JSON value type. + /*! This creates a Value of specified type with default content. + \param type Type of the value. + \note Default content for number is zero. + */ + GenericValue(Type type) { + static const unsigned defaultFlags[7] = { + kNullFlag, kFalseFlag, kTrueFlag, kObjectFlag, kArrayFlag, kConstStringFlag, + kNumberFlag | kIntFlag | kUintFlag | kInt64Flag | kUint64Flag | kDoubleFlag + }; + RAPIDJSON_ASSERT(type <= kNumberType); + flags_ = defaultFlags[type]; + memset(&data_, 0, sizeof(data_)); + } + + //! Constructor for boolean value. + GenericValue(bool b) : flags_(b ? kTrueFlag : kFalseFlag) {} + + //! Constructor for int value. + GenericValue(int i) : flags_(kNumberIntFlag) { + data_.n.i64 = i; + if (i >= 0) + flags_ |= kUintFlag | kUint64Flag; + } + + //! Constructor for unsigned value. + GenericValue(unsigned u) : flags_(kNumberUintFlag) { + data_.n.u64 = u; + if (!(u & 0x80000000)) + flags_ |= kIntFlag | kInt64Flag; + } + + //! Constructor for int64_t value. + GenericValue(int64_t i64) : flags_(kNumberInt64Flag) { + data_.n.i64 = i64; + if (i64 >= 0) { + flags_ |= kNumberUint64Flag; + if (!(i64 & 0xFFFFFFFF00000000LL)) + flags_ |= kUintFlag; + if (!(i64 & 0xFFFFFFFF80000000LL)) + flags_ |= kIntFlag; + } + else if (i64 >= -2147483648LL) + flags_ |= kIntFlag; + } + + //! Constructor for uint64_t value. + GenericValue(uint64_t u64) : flags_(kNumberUint64Flag) { + data_.n.u64 = u64; + if (!(u64 & 0x8000000000000000ULL)) + flags_ |= kInt64Flag; + if (!(u64 & 0xFFFFFFFF00000000ULL)) + flags_ |= kUintFlag; + if (!(u64 & 0xFFFFFFFF80000000ULL)) + flags_ |= kIntFlag; + } + + //! Constructor for double value. + GenericValue(double d) : flags_(kNumberDoubleFlag) { data_.n.d = d; } + + //! Constructor for constant string (i.e. do not make a copy of string) + GenericValue(const Ch* s, SizeType length) { + RAPIDJSON_ASSERT(s != NULL); + flags_ = kConstStringFlag; + data_.s.str = s; + data_.s.length = length; + } + + //! Constructor for constant string (i.e. do not make a copy of string) + GenericValue(const Ch* s) { SetStringRaw(s, internal::StrLen(s)); } + + //! Constructor for copy-string (i.e. do make a copy of string) + GenericValue(const Ch* s, SizeType length, Allocator& allocator) { SetStringRaw(s, length, allocator); } + + //! Constructor for copy-string (i.e. do make a copy of string) + GenericValue(const Ch*s, Allocator& allocator) { SetStringRaw(s, internal::StrLen(s), allocator); } + + //! Destructor. + /*! Need to destruct elements of array, members of object, or copy-string. + */ + ~GenericValue() { + if (Allocator::kNeedFree) { // Shortcut by Allocator's trait + switch(flags_) { + case kArrayFlag: + for (GenericValue* v = data_.a.elements; v != data_.a.elements + data_.a.size; ++v) + v->~GenericValue(); + Allocator::Free(data_.a.elements); + break; + + case kObjectFlag: + for (Member* m = data_.o.members; m != data_.o.members + data_.o.size; ++m) { + m->name.~GenericValue(); + m->value.~GenericValue(); + } + Allocator::Free(data_.o.members); + break; + + case kCopyStringFlag: + Allocator::Free(const_cast(data_.s.str)); + break; + } + } + } + + //@} + + //!@name Assignment operators + //@{ + + //! Assignment with move semantics. + /*! \param rhs Source of the assignment. It will become a null value after assignment. + */ + GenericValue& operator=(GenericValue& rhs) { + RAPIDJSON_ASSERT(this != &rhs); + this->~GenericValue(); + memcpy(this, &rhs, sizeof(GenericValue)); + rhs.flags_ = kNullFlag; + return *this; + } + + //! Assignment with primitive types. + /*! \tparam T Either Type, int, unsigned, int64_t, uint64_t, const Ch* + \param value The value to be assigned. + */ + template + GenericValue& operator=(T value) { + this->~GenericValue(); + new (this) GenericValue(value); + return *this; + } + //@} + + //!@name Type + //@{ + + Type GetType() const { return static_cast(flags_ & kTypeMask); } + bool IsNull() const { return flags_ == kNullFlag; } + bool IsFalse() const { return flags_ == kFalseFlag; } + bool IsTrue() const { return flags_ == kTrueFlag; } + bool IsBool() const { return (flags_ & kBoolFlag) != 0; } + bool IsObject() const { return flags_ == kObjectFlag; } + bool IsArray() const { return flags_ == kArrayFlag; } + bool IsNumber() const { return (flags_ & kNumberFlag) != 0; } + bool IsInt() const { return (flags_ & kIntFlag) != 0; } + bool IsUint() const { return (flags_ & kUintFlag) != 0; } + bool IsInt64() const { return (flags_ & kInt64Flag) != 0; } + bool IsUint64() const { return (flags_ & kUint64Flag) != 0; } + bool IsDouble() const { return (flags_ & kDoubleFlag) != 0; } + bool IsString() const { return (flags_ & kStringFlag) != 0; } + + //@} + + //!@name Null + //@{ + + GenericValue& SetNull() { this->~GenericValue(); new (this) GenericValue(); return *this; } + + //@} + + //!@name Bool + //@{ + + bool GetBool() const { RAPIDJSON_ASSERT(IsBool()); return flags_ == kTrueFlag; } + GenericValue& SetBool(bool b) { this->~GenericValue(); new (this) GenericValue(b); return *this; } + + //@} + + //!@name Object + //@{ + + //! Set this value as an empty object. + GenericValue& SetObject() { this->~GenericValue(); new (this) GenericValue(kObjectType); return *this; } + + //! Get the value associated with the object's name. + GenericValue& operator[](const Ch* name) { + if (Member* member = FindMember(name)) + return member->value; + else { + static GenericValue NullValue; + return NullValue; + } + } + const GenericValue& operator[](const Ch* name) const { return const_cast(*this)[name]; } + + //! Member iterators. + ConstMemberIterator MemberBegin() const { RAPIDJSON_ASSERT(IsObject()); return data_.o.members; } + ConstMemberIterator MemberEnd() const { RAPIDJSON_ASSERT(IsObject()); return data_.o.members + data_.o.size; } + MemberIterator MemberBegin() { RAPIDJSON_ASSERT(IsObject()); return data_.o.members; } + MemberIterator MemberEnd() { RAPIDJSON_ASSERT(IsObject()); return data_.o.members + data_.o.size; } + + //! Check whether a member exists in the object. + bool HasMember(const Ch* name) const { return FindMember(name) != 0; } + + //! Add a member (name-value pair) to the object. + /*! \param name A string value as name of member. + \param value Value of any type. + \param allocator Allocator for reallocating memory. + \return The value itself for fluent API. + \note The ownership of name and value will be transfered to this object if success. + */ + GenericValue& AddMember(GenericValue& name, GenericValue& value, Allocator& allocator) { + RAPIDJSON_ASSERT(IsObject()); + RAPIDJSON_ASSERT(name.IsString()); + Object& o = data_.o; + if (o.size >= o.capacity) { + if (o.capacity == 0) { + o.capacity = kDefaultObjectCapacity; + o.members = (Member*)allocator.Malloc(o.capacity * sizeof(Member)); + } + else { + SizeType oldCapacity = o.capacity; + o.capacity *= 2; + o.members = (Member*)allocator.Realloc(o.members, oldCapacity * sizeof(Member), o.capacity * sizeof(Member)); + } + } + o.members[o.size].name.RawAssign(name); + o.members[o.size].value.RawAssign(value); + o.size++; + return *this; + } + + GenericValue& AddMember(const Ch* name, Allocator& nameAllocator, GenericValue& value, Allocator& allocator) { + GenericValue n(name, internal::StrLen(name), nameAllocator); + return AddMember(n, value, allocator); + } + + GenericValue& AddMember(const Ch* name, GenericValue& value, Allocator& allocator) { + GenericValue n(name, internal::StrLen(name)); + return AddMember(n, value, allocator); + } + + template + GenericValue& AddMember(const Ch* name, T value, Allocator& allocator) { + GenericValue n(name, internal::StrLen(name)); + GenericValue v(value); + return AddMember(n, v, allocator); + } + + //! Remove a member in object by its name. + /*! \param name Name of member to be removed. + \return Whether the member existed. + \note Removing member is implemented by moving the last member. So the ordering of members is changed. + */ + bool RemoveMember(const Ch* name) { + RAPIDJSON_ASSERT(IsObject()); + if (Member* m = FindMember(name)) { + RAPIDJSON_ASSERT(data_.o.size > 0); + RAPIDJSON_ASSERT(data_.o.members != 0); + + Member* last = data_.o.members + (data_.o.size - 1); + if (data_.o.size > 1 && m != last) { + // Move the last one to this place + m->name = last->name; + m->value = last->value; + } + else { + // Only one left, just destroy + m->name.~GenericValue(); + m->value.~GenericValue(); + } + --data_.o.size; + return true; + } + return false; + } + + //@} + + //!@name Array + //@{ + + //! Set this value as an empty array. + GenericValue& SetArray() { this->~GenericValue(); new (this) GenericValue(kArrayType); return *this; } + + //! Get the number of elements in array. + SizeType Size() const { RAPIDJSON_ASSERT(IsArray()); return data_.a.size; } + + //! Get the capacity of array. + SizeType Capacity() const { RAPIDJSON_ASSERT(IsArray()); return data_.a.capacity; } + + //! Check whether the array is empty. + bool Empty() const { RAPIDJSON_ASSERT(IsArray()); return data_.a.size == 0; } + + //! Remove all elements in the array. + /*! This function do not deallocate memory in the array, i.e. the capacity is unchanged. + */ + void Clear() { + RAPIDJSON_ASSERT(IsArray()); + for (SizeType i = 0; i < data_.a.size; ++i) + data_.a.elements[i].~GenericValue(); + data_.a.size = 0; + } + + //! Get an element from array by index. + /*! \param index Zero-based index of element. + \note +\code +Value a(kArrayType); +a.PushBack(123); +int x = a[0].GetInt(); // Error: operator[ is ambiguous, as 0 also mean a null pointer of const char* type. +int y = a[SizeType(0)].GetInt(); // Cast to SizeType will work. +int z = a[0u].GetInt(); // This works too. +\endcode + */ + GenericValue& operator[](SizeType index) { + RAPIDJSON_ASSERT(IsArray()); + RAPIDJSON_ASSERT(index < data_.a.size); + return data_.a.elements[index]; + } + const GenericValue& operator[](SizeType index) const { return const_cast(*this)[index]; } + + //! Element iterator + ValueIterator Begin() { RAPIDJSON_ASSERT(IsArray()); return data_.a.elements; } + ValueIterator End() { RAPIDJSON_ASSERT(IsArray()); return data_.a.elements + data_.a.size; } + ConstValueIterator Begin() const { return const_cast(*this).Begin(); } + ConstValueIterator End() const { return const_cast(*this).End(); } + + //! Request the array to have enough capacity to store elements. + /*! \param newCapacity The capacity that the array at least need to have. + \param allocator The allocator for allocating memory. It must be the same one use previously. + \return The value itself for fluent API. + */ + GenericValue& Reserve(SizeType newCapacity, Allocator &allocator) { + RAPIDJSON_ASSERT(IsArray()); + if (newCapacity > data_.a.capacity) { + data_.a.elements = (GenericValue*)allocator.Realloc(data_.a.elements, data_.a.capacity * sizeof(GenericValue), newCapacity * sizeof(GenericValue)); + data_.a.capacity = newCapacity; + } + return *this; + } + + //! Append a value at the end of the array. + /*! \param value The value to be appended. + \param allocator The allocator for allocating memory. It must be the same one use previously. + \return The value itself for fluent API. + \note The ownership of the value will be transfered to this object if success. + \note If the number of elements to be appended is known, calls Reserve() once first may be more efficient. + */ + GenericValue& PushBack(GenericValue& value, Allocator& allocator) { + RAPIDJSON_ASSERT(IsArray()); + if (data_.a.size >= data_.a.capacity) + Reserve(data_.a.capacity == 0 ? kDefaultArrayCapacity : data_.a.capacity * 2, allocator); + data_.a.elements[data_.a.size++].RawAssign(value); + return *this; + } + + template + GenericValue& PushBack(T value, Allocator& allocator) { + GenericValue v(value); + return PushBack(v, allocator); + } + + //! Remove the last element in the array. + GenericValue& PopBack() { + RAPIDJSON_ASSERT(IsArray()); + RAPIDJSON_ASSERT(!Empty()); + data_.a.elements[--data_.a.size].~GenericValue(); + return *this; + } + //@} + + //!@name Number + //@{ + + int GetInt() const { RAPIDJSON_ASSERT(flags_ & kIntFlag); return data_.n.i.i; } + unsigned GetUint() const { RAPIDJSON_ASSERT(flags_ & kUintFlag); return data_.n.u.u; } + int64_t GetInt64() const { RAPIDJSON_ASSERT(flags_ & kInt64Flag); return data_.n.i64; } + uint64_t GetUint64() const { RAPIDJSON_ASSERT(flags_ & kUint64Flag); return data_.n.u64; } + + double GetDouble() const { + RAPIDJSON_ASSERT(IsNumber()); + if ((flags_ & kDoubleFlag) != 0) return data_.n.d; // exact type, no conversion. + if ((flags_ & kIntFlag) != 0) return data_.n.i.i; // int -> double + if ((flags_ & kUintFlag) != 0) return data_.n.u.u; // unsigned -> double + if ((flags_ & kInt64Flag) != 0) return (double)data_.n.i64; // int64_t -> double (may lose precision) + RAPIDJSON_ASSERT((flags_ & kUint64Flag) != 0); return (double)data_.n.u64; // uint64_t -> double (may lose precision) + } + + GenericValue& SetInt(int i) { this->~GenericValue(); new (this) GenericValue(i); return *this; } + GenericValue& SetUint(unsigned u) { this->~GenericValue(); new (this) GenericValue(u); return *this; } + GenericValue& SetInt64(int64_t i64) { this->~GenericValue(); new (this) GenericValue(i64); return *this; } + GenericValue& SetUint64(uint64_t u64) { this->~GenericValue(); new (this) GenericValue(u64); return *this; } + GenericValue& SetDouble(double d) { this->~GenericValue(); new (this) GenericValue(d); return *this; } + + //@} + + //!@name String + //@{ + + const Ch* GetString() const { RAPIDJSON_ASSERT(IsString()); return data_.s.str; } + + //! Get the length of string. + /*! Since rapidjson permits "\u0000" in the json string, strlen(v.GetString()) may not equal to v.GetStringLength(). + */ + SizeType GetStringLength() const { RAPIDJSON_ASSERT(IsString()); return data_.s.length; } + + //! Set this value as a string without copying source string. + /*! This version has better performance with supplied length, and also support string containing null character. + \param s source string pointer. + \param length The length of source string, excluding the trailing null terminator. + \return The value itself for fluent API. + */ + GenericValue& SetString(const Ch* s, SizeType length) { this->~GenericValue(); SetStringRaw(s, length); return *this; } + + //! Set this value as a string without copying source string. + /*! \param s source string pointer. + \return The value itself for fluent API. + */ + GenericValue& SetString(const Ch* s) { return SetString(s, internal::StrLen(s)); } + + //! Set this value as a string by copying from source string. + /*! This version has better performance with supplied length, and also support string containing null character. + \param s source string. + \param length The length of source string, excluding the trailing null terminator. + \param allocator Allocator for allocating copied buffer. Commonly use document.GetAllocator(). + \return The value itself for fluent API. + */ + GenericValue& SetString(const Ch* s, SizeType length, Allocator& allocator) { this->~GenericValue(); SetStringRaw(s, length, allocator); return *this; } + + //! Set this value as a string by copying from source string. + /*! \param s source string. + \param allocator Allocator for allocating copied buffer. Commonly use document.GetAllocator(). + \return The value itself for fluent API. + */ + GenericValue& SetString(const Ch* s, Allocator& allocator) { SetString(s, internal::StrLen(s), allocator); return *this; } + + //@} + + //! Generate events of this value to a Handler. + /*! This function adopts the GoF visitor pattern. + Typical usage is to output this JSON value as JSON text via Writer, which is a Handler. + It can also be used to deep clone this value via GenericDocument, which is also a Handler. + \tparam Handler type of handler. + \param handler An object implementing concept Handler. + */ + template + const GenericValue& Accept(Handler& handler) const { + switch(GetType()) { + case kNullType: handler.Null(); break; + case kFalseType: handler.Bool(false); break; + case kTrueType: handler.Bool(true); break; + + case kObjectType: + handler.StartObject(); + for (Member* m = data_.o.members; m != data_.o.members + data_.o.size; ++m) { + handler.String(m->name.data_.s.str, m->name.data_.s.length, false); + m->value.Accept(handler); + } + handler.EndObject(data_.o.size); + break; + + case kArrayType: + handler.StartArray(); + for (GenericValue* v = data_.a.elements; v != data_.a.elements + data_.a.size; ++v) + v->Accept(handler); + handler.EndArray(data_.a.size); + break; + + case kStringType: + handler.String(data_.s.str, data_.s.length, false); + break; + + case kNumberType: + if (IsInt()) handler.Int(data_.n.i.i); + else if (IsUint()) handler.Uint(data_.n.u.u); + else if (IsInt64()) handler.Int64(data_.n.i64); + else if (IsUint64()) handler.Uint64(data_.n.u64); + else handler.Double(data_.n.d); + break; + } + return *this; + } + +private: + template + friend class GenericDocument; + + enum { + kBoolFlag = 0x100, + kNumberFlag = 0x200, + kIntFlag = 0x400, + kUintFlag = 0x800, + kInt64Flag = 0x1000, + kUint64Flag = 0x2000, + kDoubleFlag = 0x4000, + kStringFlag = 0x100000, + kCopyFlag = 0x200000, + + // Initial flags of different types. + kNullFlag = kNullType, + kTrueFlag = kTrueType | kBoolFlag, + kFalseFlag = kFalseType | kBoolFlag, + kNumberIntFlag = kNumberType | kNumberFlag | kIntFlag | kInt64Flag, + kNumberUintFlag = kNumberType | kNumberFlag | kUintFlag | kUint64Flag | kInt64Flag, + kNumberInt64Flag = kNumberType | kNumberFlag | kInt64Flag, + kNumberUint64Flag = kNumberType | kNumberFlag | kUint64Flag, + kNumberDoubleFlag = kNumberType | kNumberFlag | kDoubleFlag, + kConstStringFlag = kStringType | kStringFlag, + kCopyStringFlag = kStringType | kStringFlag | kCopyFlag, + kObjectFlag = kObjectType, + kArrayFlag = kArrayType, + + kTypeMask = 0xFF // bitwise-and with mask of 0xFF can be optimized by compiler + }; + + static const SizeType kDefaultArrayCapacity = 16; + static const SizeType kDefaultObjectCapacity = 16; + + struct String { + const Ch* str; + SizeType length; + unsigned hashcode; //!< reserved + }; // 12 bytes in 32-bit mode, 16 bytes in 64-bit mode + + // By using proper binary layout, retrieval of different integer types do not need conversions. + union Number { +#if RAPIDJSON_ENDIAN == RAPIDJSON_LITTLEENDIAN + struct I { + int i; + char padding[4]; + }i; + struct U { + unsigned u; + char padding2[4]; + }u; +#else + struct I { + char padding[4]; + int i; + }i; + struct U { + char padding2[4]; + unsigned u; + }u; +#endif + int64_t i64; + uint64_t u64; + double d; + }; // 8 bytes + + struct Object { + Member* members; + SizeType size; + SizeType capacity; + }; // 12 bytes in 32-bit mode, 16 bytes in 64-bit mode + + struct Array { + GenericValue* elements; + SizeType size; + SizeType capacity; + }; // 12 bytes in 32-bit mode, 16 bytes in 64-bit mode + + union Data { + String s; + Number n; + Object o; + Array a; + }; // 12 bytes in 32-bit mode, 16 bytes in 64-bit mode + + //! Find member by name. + Member* FindMember(const Ch* name) { + RAPIDJSON_ASSERT(name); + RAPIDJSON_ASSERT(IsObject()); + + SizeType length = internal::StrLen(name); + + Object& o = data_.o; + for (Member* member = o.members; member != data_.o.members + data_.o.size; ++member) + if (length == member->name.data_.s.length && memcmp(member->name.data_.s.str, name, length * sizeof(Ch)) == 0) + return member; + + return 0; + } + const Member* FindMember(const Ch* name) const { return const_cast(*this).FindMember(name); } + + // Initialize this value as array with initial data, without calling destructor. + void SetArrayRaw(GenericValue* values, SizeType count, Allocator& alloctaor) { + flags_ = kArrayFlag; + data_.a.elements = (GenericValue*)alloctaor.Malloc(count * sizeof(GenericValue)); + memcpy(data_.a.elements, values, count * sizeof(GenericValue)); + data_.a.size = data_.a.capacity = count; + } + + //! Initialize this value as object with initial data, without calling destructor. + void SetObjectRaw(Member* members, SizeType count, Allocator& alloctaor) { + flags_ = kObjectFlag; + data_.o.members = (Member*)alloctaor.Malloc(count * sizeof(Member)); + memcpy(data_.o.members, members, count * sizeof(Member)); + data_.o.size = data_.o.capacity = count; + } + + //! Initialize this value as constant string, without calling destructor. + void SetStringRaw(const Ch* s, SizeType length) { + RAPIDJSON_ASSERT(s != NULL); + flags_ = kConstStringFlag; + data_.s.str = s; + data_.s.length = length; + } + + //! Initialize this value as copy string with initial data, without calling destructor. + void SetStringRaw(const Ch* s, SizeType length, Allocator& allocator) { + RAPIDJSON_ASSERT(s != NULL); + flags_ = kCopyStringFlag; + data_.s.str = (Ch *)allocator.Malloc((length + 1) * sizeof(Ch)); + data_.s.length = length; + memcpy(const_cast(data_.s.str), s, length * sizeof(Ch)); + const_cast(data_.s.str)[length] = '\0'; + } + + //! Assignment without calling destructor + void RawAssign(GenericValue& rhs) { + memcpy(this, &rhs, sizeof(GenericValue)); + rhs.flags_ = kNullFlag; + } + + Data data_; + unsigned flags_; +}; +#pragma pack (pop) + +//! Value with UTF8 encoding. +typedef GenericValue > Value; + +/////////////////////////////////////////////////////////////////////////////// +// GenericDocument + +//! A document for parsing JSON text as DOM. +/*! + \implements Handler + \tparam Encoding encoding for both parsing and string storage. + \tparam Alloactor allocator for allocating memory for the DOM, and the stack during parsing. +*/ +template > +class GenericDocument : public GenericValue { +public: + typedef typename Encoding::Ch Ch; //!< Character type derived from Encoding. + typedef GenericValue ValueType; //!< Value type of the document. + typedef Allocator AllocatorType; //!< Allocator type from template parameter. + + //! Constructor + /*! \param allocator Optional allocator for allocating stack memory. + \param stackCapacity Initial capacity of stack in bytes. + */ + GenericDocument(Allocator* allocator = 0, size_t stackCapacity = kDefaultStackCapacity) : stack_(allocator, stackCapacity), parseError_(0), errorOffset_(0) {} + + //! Parse JSON text from an input stream. + /*! \tparam parseFlags Combination of ParseFlag. + \param stream Input stream to be parsed. + \return The document itself for fluent API. + */ + template + GenericDocument& ParseStream(Stream& stream) { + ValueType::SetNull(); // Remove existing root if exist + GenericReader reader; + if (reader.template Parse(stream, *this)) { + RAPIDJSON_ASSERT(stack_.GetSize() == sizeof(ValueType)); // Got one and only one root object + this->RawAssign(*stack_.template Pop(1)); // Add this-> to prevent issue 13. + parseError_ = 0; + errorOffset_ = 0; + } + else { + parseError_ = reader.GetParseError(); + errorOffset_ = reader.GetErrorOffset(); + ClearStack(); + } + return *this; + } + + //! Parse JSON text from a mutable string. + /*! \tparam parseFlags Combination of ParseFlag. + \param str Mutable zero-terminated string to be parsed. + \return The document itself for fluent API. + */ + template + GenericDocument& ParseInsitu(Ch* str) { + GenericInsituStringStream s(str); + return ParseStream(s); + } + + //! Parse JSON text from a read-only string. + /*! \tparam parseFlags Combination of ParseFlag (must not contain kParseInsituFlag). + \param str Read-only zero-terminated string to be parsed. + */ + template + GenericDocument& Parse(const Ch* str) { + RAPIDJSON_ASSERT(!(parseFlags & kParseInsituFlag)); + GenericStringStream s(str); + return ParseStream(s); + } + + //! Whether a parse error was occured in the last parsing. + bool HasParseError() const { return parseError_ != 0; } + + //! Get the message of parsing error. + const char* GetParseError() const { return parseError_; } + + //! Get the offset in character of the parsing error. + size_t GetErrorOffset() const { return errorOffset_; } + + //! Get the allocator of this document. + Allocator& GetAllocator() { return stack_.GetAllocator(); } + + //! Get the capacity of stack in bytes. + size_t GetStackCapacity() const { return stack_.GetCapacity(); } + +private: + // Prohibit assignment + GenericDocument& operator=(const GenericDocument&); + + friend class GenericReader; // for Reader to call the following private handler functions + + // Implementation of Handler + void Null() { new (stack_.template Push()) ValueType(); } + void Bool(bool b) { new (stack_.template Push()) ValueType(b); } + void Int(int i) { new (stack_.template Push()) ValueType(i); } + void Uint(unsigned i) { new (stack_.template Push()) ValueType(i); } + void Int64(int64_t i) { new (stack_.template Push()) ValueType(i); } + void Uint64(uint64_t i) { new (stack_.template Push()) ValueType(i); } + void Double(double d) { new (stack_.template Push()) ValueType(d); } + + void String(const Ch* str, SizeType length, bool copy) { + if (copy) + new (stack_.template Push()) ValueType(str, length, GetAllocator()); + else + new (stack_.template Push()) ValueType(str, length); + } + + void StartObject() { new (stack_.template Push()) ValueType(kObjectType); } + + void EndObject(SizeType memberCount) { + typename ValueType::Member* members = stack_.template Pop(memberCount); + stack_.template Top()->SetObjectRaw(members, (SizeType)memberCount, GetAllocator()); + } + + void StartArray() { new (stack_.template Push()) ValueType(kArrayType); } + + void EndArray(SizeType elementCount) { + ValueType* elements = stack_.template Pop(elementCount); + stack_.template Top()->SetArrayRaw(elements, elementCount, GetAllocator()); + } + + void ClearStack() { + if (Allocator::kNeedFree) + while (stack_.GetSize() > 0) // Here assumes all elements in stack array are GenericValue (Member is actually 2 GenericValue objects) + (stack_.template Pop(1))->~ValueType(); + else + stack_.Clear(); + } + + static const size_t kDefaultStackCapacity = 1024; + internal::Stack stack_; + const char* parseError_; + size_t errorOffset_; +}; + +typedef GenericDocument > Document; + +} // namespace rapidjson + +#ifdef _MSC_VER +#pragma warning(pop) +#endif + +#endif // RAPIDJSON_DOCUMENT_H_ diff --git a/deps/rapidjson-0.11/filestream.h b/deps/rapidjson-0.11/filestream.h new file mode 100755 index 0000000..24573aa --- /dev/null +++ b/deps/rapidjson-0.11/filestream.h @@ -0,0 +1,46 @@ +#ifndef RAPIDJSON_FILESTREAM_H_ +#define RAPIDJSON_FILESTREAM_H_ + +#include + +namespace rapidjson { + +//! Wrapper of C file stream for input or output. +/*! + This simple wrapper does not check the validity of the stream. + \implements Stream +*/ +class FileStream { +public: + typedef char Ch; //!< Character type. Only support char. + + FileStream(FILE* fp) : fp_(fp), count_(0) { Read(); } + char Peek() const { return current_; } + char Take() { char c = current_; Read(); return c; } + size_t Tell() const { return count_; } + void Put(char c) { fputc(c, fp_); } + + // Not implemented + char* PutBegin() { return 0; } + size_t PutEnd(char*) { return 0; } + +private: + void Read() { + RAPIDJSON_ASSERT(fp_ != 0); + int c = fgetc(fp_); + if (c != EOF) { + current_ = (char)c; + count_++; + } + else + current_ = '\0'; + } + + FILE* fp_; + char current_; + size_t count_; +}; + +} // namespace rapidjson + +#endif // RAPIDJSON_FILESTREAM_H_ diff --git a/deps/rapidjson-0.11/internal/pow10.h b/deps/rapidjson-0.11/internal/pow10.h new file mode 100755 index 0000000..0852539 --- /dev/null +++ b/deps/rapidjson-0.11/internal/pow10.h @@ -0,0 +1,54 @@ +#ifndef RAPIDJSON_POW10_ +#define RAPIDJSON_POW10_ + +namespace rapidjson { +namespace internal { + +//! Computes integer powers of 10 in double (10.0^n). +/*! This function uses lookup table for fast and accurate results. + \param n positive/negative exponent. Must <= 308. + \return 10.0^n +*/ +inline double Pow10(int n) { + static const double e[] = { // 1e-308...1e308: 617 * 8 bytes = 4936 bytes + 1e-308,1e-307,1e-306,1e-305,1e-304,1e-303,1e-302,1e-301,1e-300, + 1e-299,1e-298,1e-297,1e-296,1e-295,1e-294,1e-293,1e-292,1e-291,1e-290,1e-289,1e-288,1e-287,1e-286,1e-285,1e-284,1e-283,1e-282,1e-281,1e-280, + 1e-279,1e-278,1e-277,1e-276,1e-275,1e-274,1e-273,1e-272,1e-271,1e-270,1e-269,1e-268,1e-267,1e-266,1e-265,1e-264,1e-263,1e-262,1e-261,1e-260, + 1e-259,1e-258,1e-257,1e-256,1e-255,1e-254,1e-253,1e-252,1e-251,1e-250,1e-249,1e-248,1e-247,1e-246,1e-245,1e-244,1e-243,1e-242,1e-241,1e-240, + 1e-239,1e-238,1e-237,1e-236,1e-235,1e-234,1e-233,1e-232,1e-231,1e-230,1e-229,1e-228,1e-227,1e-226,1e-225,1e-224,1e-223,1e-222,1e-221,1e-220, + 1e-219,1e-218,1e-217,1e-216,1e-215,1e-214,1e-213,1e-212,1e-211,1e-210,1e-209,1e-208,1e-207,1e-206,1e-205,1e-204,1e-203,1e-202,1e-201,1e-200, + 1e-199,1e-198,1e-197,1e-196,1e-195,1e-194,1e-193,1e-192,1e-191,1e-190,1e-189,1e-188,1e-187,1e-186,1e-185,1e-184,1e-183,1e-182,1e-181,1e-180, + 1e-179,1e-178,1e-177,1e-176,1e-175,1e-174,1e-173,1e-172,1e-171,1e-170,1e-169,1e-168,1e-167,1e-166,1e-165,1e-164,1e-163,1e-162,1e-161,1e-160, + 1e-159,1e-158,1e-157,1e-156,1e-155,1e-154,1e-153,1e-152,1e-151,1e-150,1e-149,1e-148,1e-147,1e-146,1e-145,1e-144,1e-143,1e-142,1e-141,1e-140, + 1e-139,1e-138,1e-137,1e-136,1e-135,1e-134,1e-133,1e-132,1e-131,1e-130,1e-129,1e-128,1e-127,1e-126,1e-125,1e-124,1e-123,1e-122,1e-121,1e-120, + 1e-119,1e-118,1e-117,1e-116,1e-115,1e-114,1e-113,1e-112,1e-111,1e-110,1e-109,1e-108,1e-107,1e-106,1e-105,1e-104,1e-103,1e-102,1e-101,1e-100, + 1e-99, 1e-98, 1e-97, 1e-96, 1e-95, 1e-94, 1e-93, 1e-92, 1e-91, 1e-90, 1e-89, 1e-88, 1e-87, 1e-86, 1e-85, 1e-84, 1e-83, 1e-82, 1e-81, 1e-80, + 1e-79, 1e-78, 1e-77, 1e-76, 1e-75, 1e-74, 1e-73, 1e-72, 1e-71, 1e-70, 1e-69, 1e-68, 1e-67, 1e-66, 1e-65, 1e-64, 1e-63, 1e-62, 1e-61, 1e-60, + 1e-59, 1e-58, 1e-57, 1e-56, 1e-55, 1e-54, 1e-53, 1e-52, 1e-51, 1e-50, 1e-49, 1e-48, 1e-47, 1e-46, 1e-45, 1e-44, 1e-43, 1e-42, 1e-41, 1e-40, + 1e-39, 1e-38, 1e-37, 1e-36, 1e-35, 1e-34, 1e-33, 1e-32, 1e-31, 1e-30, 1e-29, 1e-28, 1e-27, 1e-26, 1e-25, 1e-24, 1e-23, 1e-22, 1e-21, 1e-20, + 1e-19, 1e-18, 1e-17, 1e-16, 1e-15, 1e-14, 1e-13, 1e-12, 1e-11, 1e-10, 1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1e+0, + 1e+1, 1e+2, 1e+3, 1e+4, 1e+5, 1e+6, 1e+7, 1e+8, 1e+9, 1e+10, 1e+11, 1e+12, 1e+13, 1e+14, 1e+15, 1e+16, 1e+17, 1e+18, 1e+19, 1e+20, + 1e+21, 1e+22, 1e+23, 1e+24, 1e+25, 1e+26, 1e+27, 1e+28, 1e+29, 1e+30, 1e+31, 1e+32, 1e+33, 1e+34, 1e+35, 1e+36, 1e+37, 1e+38, 1e+39, 1e+40, + 1e+41, 1e+42, 1e+43, 1e+44, 1e+45, 1e+46, 1e+47, 1e+48, 1e+49, 1e+50, 1e+51, 1e+52, 1e+53, 1e+54, 1e+55, 1e+56, 1e+57, 1e+58, 1e+59, 1e+60, + 1e+61, 1e+62, 1e+63, 1e+64, 1e+65, 1e+66, 1e+67, 1e+68, 1e+69, 1e+70, 1e+71, 1e+72, 1e+73, 1e+74, 1e+75, 1e+76, 1e+77, 1e+78, 1e+79, 1e+80, + 1e+81, 1e+82, 1e+83, 1e+84, 1e+85, 1e+86, 1e+87, 1e+88, 1e+89, 1e+90, 1e+91, 1e+92, 1e+93, 1e+94, 1e+95, 1e+96, 1e+97, 1e+98, 1e+99, 1e+100, + 1e+101,1e+102,1e+103,1e+104,1e+105,1e+106,1e+107,1e+108,1e+109,1e+110,1e+111,1e+112,1e+113,1e+114,1e+115,1e+116,1e+117,1e+118,1e+119,1e+120, + 1e+121,1e+122,1e+123,1e+124,1e+125,1e+126,1e+127,1e+128,1e+129,1e+130,1e+131,1e+132,1e+133,1e+134,1e+135,1e+136,1e+137,1e+138,1e+139,1e+140, + 1e+141,1e+142,1e+143,1e+144,1e+145,1e+146,1e+147,1e+148,1e+149,1e+150,1e+151,1e+152,1e+153,1e+154,1e+155,1e+156,1e+157,1e+158,1e+159,1e+160, + 1e+161,1e+162,1e+163,1e+164,1e+165,1e+166,1e+167,1e+168,1e+169,1e+170,1e+171,1e+172,1e+173,1e+174,1e+175,1e+176,1e+177,1e+178,1e+179,1e+180, + 1e+181,1e+182,1e+183,1e+184,1e+185,1e+186,1e+187,1e+188,1e+189,1e+190,1e+191,1e+192,1e+193,1e+194,1e+195,1e+196,1e+197,1e+198,1e+199,1e+200, + 1e+201,1e+202,1e+203,1e+204,1e+205,1e+206,1e+207,1e+208,1e+209,1e+210,1e+211,1e+212,1e+213,1e+214,1e+215,1e+216,1e+217,1e+218,1e+219,1e+220, + 1e+221,1e+222,1e+223,1e+224,1e+225,1e+226,1e+227,1e+228,1e+229,1e+230,1e+231,1e+232,1e+233,1e+234,1e+235,1e+236,1e+237,1e+238,1e+239,1e+240, + 1e+241,1e+242,1e+243,1e+244,1e+245,1e+246,1e+247,1e+248,1e+249,1e+250,1e+251,1e+252,1e+253,1e+254,1e+255,1e+256,1e+257,1e+258,1e+259,1e+260, + 1e+261,1e+262,1e+263,1e+264,1e+265,1e+266,1e+267,1e+268,1e+269,1e+270,1e+271,1e+272,1e+273,1e+274,1e+275,1e+276,1e+277,1e+278,1e+279,1e+280, + 1e+281,1e+282,1e+283,1e+284,1e+285,1e+286,1e+287,1e+288,1e+289,1e+290,1e+291,1e+292,1e+293,1e+294,1e+295,1e+296,1e+297,1e+298,1e+299,1e+300, + 1e+301,1e+302,1e+303,1e+304,1e+305,1e+306,1e+307,1e+308 + }; + RAPIDJSON_ASSERT(n <= 308); + return n < -308 ? 0.0 : e[n + 308]; +} + +} // namespace internal +} // namespace rapidjson + +#endif // RAPIDJSON_POW10_ diff --git a/deps/rapidjson-0.11/internal/stack.h b/deps/rapidjson-0.11/internal/stack.h new file mode 100755 index 0000000..3138b96 --- /dev/null +++ b/deps/rapidjson-0.11/internal/stack.h @@ -0,0 +1,82 @@ +#ifndef RAPIDJSON_INTERNAL_STACK_H_ +#define RAPIDJSON_INTERNAL_STACK_H_ + +namespace rapidjson { +namespace internal { + +/////////////////////////////////////////////////////////////////////////////// +// Stack + +//! A type-unsafe stack for storing different types of data. +/*! \tparam Allocator Allocator for allocating stack memory. +*/ +template +class Stack { +public: + Stack(Allocator* allocator, size_t stack_capacity) : allocator_(allocator), own_allocator_(0), stack_(0), stack_top_(0), stack_end_(0), stack_capacity_(stack_capacity) { + RAPIDJSON_ASSERT(stack_capacity_ > 0); + if (!allocator_) + own_allocator_ = allocator_ = new Allocator(); + stack_top_ = stack_ = (char*)allocator_->Malloc(stack_capacity_); + stack_end_ = stack_ + stack_capacity_; + } + + ~Stack() { + Allocator::Free(stack_); + delete own_allocator_; // Only delete if it is owned by the stack + } + + void Clear() { /*stack_top_ = 0;*/ stack_top_ = stack_; } + + template + T* Push(size_t count = 1) { + // Expand the stack if needed + if (stack_top_ + sizeof(T) * count >= stack_end_) { + size_t new_capacity = stack_capacity_ * 2; + size_t size = GetSize(); + size_t new_size = GetSize() + sizeof(T) * count; + if (new_capacity < new_size) + new_capacity = new_size; + stack_ = (char*)allocator_->Realloc(stack_, stack_capacity_, new_capacity); + stack_capacity_ = new_capacity; + stack_top_ = stack_ + size; + stack_end_ = stack_ + stack_capacity_; + } + T* ret = (T*)stack_top_; + stack_top_ += sizeof(T) * count; + return ret; + } + + template + T* Pop(size_t count) { + RAPIDJSON_ASSERT(GetSize() >= count * sizeof(T)); + stack_top_ -= count * sizeof(T); + return (T*)stack_top_; + } + + template + T* Top() { + RAPIDJSON_ASSERT(GetSize() >= sizeof(T)); + return (T*)(stack_top_ - sizeof(T)); + } + + template + T* Bottom() { return (T*)stack_; } + + Allocator& GetAllocator() { return *allocator_; } + size_t GetSize() const { return stack_top_ - stack_; } + size_t GetCapacity() const { return stack_capacity_; } + +private: + Allocator* allocator_; + Allocator* own_allocator_; + char *stack_; + char *stack_top_; + char *stack_end_; + size_t stack_capacity_; +}; + +} // namespace internal +} // namespace rapidjson + +#endif // RAPIDJSON_STACK_H_ diff --git a/deps/rapidjson-0.11/internal/strfunc.h b/deps/rapidjson-0.11/internal/strfunc.h new file mode 100755 index 0000000..47b8ac0 --- /dev/null +++ b/deps/rapidjson-0.11/internal/strfunc.h @@ -0,0 +1,24 @@ +#ifndef RAPIDJSON_INTERNAL_STRFUNC_H_ +#define RAPIDJSON_INTERNAL_STRFUNC_H_ + +namespace rapidjson { +namespace internal { + +//! Custom strlen() which works on different character types. +/*! \tparam Ch Character type (e.g. char, wchar_t, short) + \param s Null-terminated input string. + \return Number of characters in the string. + \note This has the same semantics as strlen(), the return value is not number of Unicode codepoints. +*/ +template +inline SizeType StrLen(const Ch* s) { + const Ch* p = s; + while (*p != '\0') + ++p; + return SizeType(p - s); +} + +} // namespace internal +} // namespace rapidjson + +#endif // RAPIDJSON_INTERNAL_STRFUNC_H_ diff --git a/deps/rapidjson-0.11/prettywriter.h b/deps/rapidjson-0.11/prettywriter.h new file mode 100755 index 0000000..662b392 --- /dev/null +++ b/deps/rapidjson-0.11/prettywriter.h @@ -0,0 +1,156 @@ +#ifndef RAPIDJSON_PRETTYWRITER_H_ +#define RAPIDJSON_PRETTYWRITER_H_ + +#include "writer.h" + +namespace rapidjson { + +//! Writer with indentation and spacing. +/*! + \tparam Stream Type of ouptut stream. + \tparam Encoding Encoding of both source strings and output. + \tparam Allocator Type of allocator for allocating memory of stack. +*/ +template, typename Allocator = MemoryPoolAllocator<> > +class PrettyWriter : public Writer { +public: + typedef Writer Base; + typedef typename Base::Ch Ch; + + //! Constructor + /*! \param stream Output stream. + \param allocator User supplied allocator. If it is null, it will create a private one. + \param levelDepth Initial capacity of + */ + PrettyWriter(Stream& stream, Allocator* allocator = 0, size_t levelDepth = Base::kDefaultLevelDepth) : + Base(stream, allocator, levelDepth), indentChar_(' '), indentCharCount_(4) {} + + //! Set custom indentation. + /*! \param indentChar Character for indentation. Must be whitespace character (' ', '\t', '\n', '\r'). + \param indentCharCount Number of indent characters for each indentation level. + \note The default indentation is 4 spaces. + */ + PrettyWriter& SetIndent(Ch indentChar, unsigned indentCharCount) { + RAPIDJSON_ASSERT(indentChar == ' ' || indentChar == '\t' || indentChar == '\n' || indentChar == '\r'); + indentChar_ = indentChar; + indentCharCount_ = indentCharCount; + return *this; + } + + //@name Implementation of Handler. + //@{ + + PrettyWriter& Null() { PrettyPrefix(kNullType); Base::WriteNull(); return *this; } + PrettyWriter& Bool(bool b) { PrettyPrefix(b ? kTrueType : kFalseType); Base::WriteBool(b); return *this; } + PrettyWriter& Int(int i) { PrettyPrefix(kNumberType); Base::WriteInt(i); return *this; } + PrettyWriter& Uint(unsigned u) { PrettyPrefix(kNumberType); Base::WriteUint(u); return *this; } + PrettyWriter& Int64(int64_t i64) { PrettyPrefix(kNumberType); Base::WriteInt64(i64); return *this; } + PrettyWriter& Uint64(uint64_t u64) { PrettyPrefix(kNumberType); Base::WriteUint64(u64); return *this; } + PrettyWriter& Double(double d) { PrettyPrefix(kNumberType); Base::WriteDouble(d); return *this; } + + PrettyWriter& String(const Ch* str, SizeType length, bool copy = false) { + (void)copy; + PrettyPrefix(kStringType); + Base::WriteString(str, length); + return *this; + } + + PrettyWriter& StartObject() { + PrettyPrefix(kObjectType); + new (Base::level_stack_.template Push()) typename Base::Level(false); + Base::WriteStartObject(); + return *this; + } + + PrettyWriter& EndObject(SizeType memberCount = 0) { + (void)memberCount; + RAPIDJSON_ASSERT(Base::level_stack_.GetSize() >= sizeof(typename Base::Level)); + RAPIDJSON_ASSERT(!Base::level_stack_.template Top()->inArray); + bool empty = Base::level_stack_.template Pop(1)->valueCount == 0; + + if (!empty) { + Base::stream_.Put('\n'); + WriteIndent(); + } + Base::WriteEndObject(); + return *this; + } + + PrettyWriter& StartArray() { + PrettyPrefix(kArrayType); + new (Base::level_stack_.template Push()) typename Base::Level(true); + Base::WriteStartArray(); + return *this; + } + + PrettyWriter& EndArray(SizeType memberCount = 0) { + (void)memberCount; + RAPIDJSON_ASSERT(Base::level_stack_.GetSize() >= sizeof(typename Base::Level)); + RAPIDJSON_ASSERT(Base::level_stack_.template Top()->inArray); + bool empty = Base::level_stack_.template Pop(1)->valueCount == 0; + + if (!empty) { + Base::stream_.Put('\n'); + WriteIndent(); + } + Base::WriteEndArray(); + return *this; + } + + //@} + + //! Simpler but slower overload. + PrettyWriter& String(const Ch* str) { return String(str, internal::StrLen(str)); } + +protected: + void PrettyPrefix(Type type) { + (void)type; + if (Base::level_stack_.GetSize() != 0) { // this value is not at root + typename Base::Level* level = Base::level_stack_.template Top(); + + if (level->inArray) { + if (level->valueCount > 0) { + Base::stream_.Put(','); // add comma if it is not the first element in array + Base::stream_.Put('\n'); + } + else + Base::stream_.Put('\n'); + WriteIndent(); + } + else { // in object + if (level->valueCount > 0) { + if (level->valueCount % 2 == 0) { + Base::stream_.Put(','); + Base::stream_.Put('\n'); + } + else { + Base::stream_.Put(':'); + Base::stream_.Put(' '); + } + } + else + Base::stream_.Put('\n'); + + if (level->valueCount % 2 == 0) + WriteIndent(); + } + if (!level->inArray && level->valueCount % 2 == 0) + RAPIDJSON_ASSERT(type == kStringType); // if it's in object, then even number should be a name + level->valueCount++; + } + else + RAPIDJSON_ASSERT(type == kObjectType || type == kArrayType); + } + + void WriteIndent() { + size_t count = (Base::level_stack_.GetSize() / sizeof(typename Base::Level)) * indentCharCount_; + PutN(Base::stream_, indentChar_, count); + } + + Ch indentChar_; + unsigned indentCharCount_; +}; + +} // namespace rapidjson + +#endif // RAPIDJSON_RAPIDJSON_H_ diff --git a/deps/rapidjson-0.11/rapidjson.h b/deps/rapidjson-0.11/rapidjson.h new file mode 100755 index 0000000..357eab4 --- /dev/null +++ b/deps/rapidjson-0.11/rapidjson.h @@ -0,0 +1,525 @@ +#ifndef RAPIDJSON_RAPIDJSON_H_ +#define RAPIDJSON_RAPIDJSON_H_ + +// Copyright (c) 2011-2012 Milo Yip (miloyip@gmail.com) +// Version 0.11 + +#include // malloc(), realloc(), free() +#include // memcpy() + +/////////////////////////////////////////////////////////////////////////////// +// RAPIDJSON_NO_INT64DEFINE + +// Here defines int64_t and uint64_t types in global namespace. +// If user have their own definition, can define RAPIDJSON_NO_INT64DEFINE to disable this. +#ifndef RAPIDJSON_NO_INT64DEFINE +#ifdef _MSC_VER +typedef __int64 int64_t; +typedef unsigned __int64 uint64_t; +#else +#include +#endif +#endif // RAPIDJSON_NO_INT64TYPEDEF + +/////////////////////////////////////////////////////////////////////////////// +// RAPIDJSON_ENDIAN +#define RAPIDJSON_LITTLEENDIAN 0 //!< Little endian machine +#define RAPIDJSON_BIGENDIAN 1 //!< Big endian machine + +//! Endianness of the machine. +/*! GCC provided macro for detecting endianness of the target machine. But other + compilers may not have this. User can define RAPIDJSON_ENDIAN to either + RAPIDJSON_LITTLEENDIAN or RAPIDJSON_BIGENDIAN. +*/ +#ifndef RAPIDJSON_ENDIAN +#ifdef __BYTE_ORDER__ +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#define RAPIDJSON_ENDIAN RAPIDJSON_LITTLEENDIAN +#else +#define RAPIDJSON_ENDIAN RAPIDJSON_BIGENDIAN +#endif // __BYTE_ORDER__ +#else +#define RAPIDJSON_ENDIAN RAPIDJSON_LITTLEENDIAN // Assumes little endian otherwise. +#endif +#endif // RAPIDJSON_ENDIAN + +/////////////////////////////////////////////////////////////////////////////// +// RAPIDJSON_SSE2/RAPIDJSON_SSE42/RAPIDJSON_SIMD + +// Enable SSE2 optimization. +//#define RAPIDJSON_SSE2 + +// Enable SSE4.2 optimization. +//#define RAPIDJSON_SSE42 + +#if defined(RAPIDJSON_SSE2) || defined(RAPIDJSON_SSE42) +#define RAPIDJSON_SIMD +#endif + +/////////////////////////////////////////////////////////////////////////////// +// RAPIDJSON_NO_SIZETYPEDEFINE + +#ifndef RAPIDJSON_NO_SIZETYPEDEFINE +namespace rapidjson { +//! Use 32-bit array/string indices even for 64-bit platform, instead of using size_t. +/*! User may override the SizeType by defining RAPIDJSON_NO_SIZETYPEDEFINE. +*/ +typedef unsigned SizeType; +} // namespace rapidjson +#endif + +/////////////////////////////////////////////////////////////////////////////// +// RAPIDJSON_ASSERT + +//! Assertion. +/*! By default, rapidjson uses C assert() for assertion. + User can override it by defining RAPIDJSON_ASSERT(x) macro. +*/ +#ifndef RAPIDJSON_ASSERT +#include +#define RAPIDJSON_ASSERT(x) assert(x) +#endif // RAPIDJSON_ASSERT + +/////////////////////////////////////////////////////////////////////////////// +// Helpers + +#define RAPIDJSON_MULTILINEMACRO_BEGIN do { +#define RAPIDJSON_MULTILINEMACRO_END \ +} while((void)0, 0) + +namespace rapidjson { + +/////////////////////////////////////////////////////////////////////////////// +// Allocator + +/*! \class rapidjson::Allocator + \brief Concept for allocating, resizing and freeing memory block. + + Note that Malloc() and Realloc() are non-static but Free() is static. + + So if an allocator need to support Free(), it needs to put its pointer in + the header of memory block. + +\code +concept Allocator { + static const bool kNeedFree; //!< Whether this allocator needs to call Free(). + + // Allocate a memory block. + // \param size of the memory block in bytes. + // \returns pointer to the memory block. + void* Malloc(size_t size); + + // Resize a memory block. + // \param originalPtr The pointer to current memory block. Null pointer is permitted. + // \param originalSize The current size in bytes. (Design issue: since some allocator may not book-keep this, explicitly pass to it can save memory.) + // \param newSize the new size in bytes. + void* Realloc(void* originalPtr, size_t originalSize, size_t newSize); + + // Free a memory block. + // \param pointer to the memory block. Null pointer is permitted. + static void Free(void *ptr); +}; +\endcode +*/ + +/////////////////////////////////////////////////////////////////////////////// +// CrtAllocator + +//! C-runtime library allocator. +/*! This class is just wrapper for standard C library memory routines. + \implements Allocator +*/ +class CrtAllocator { +public: + static const bool kNeedFree = true; + void* Malloc(size_t size) { return malloc(size); } + void* Realloc(void* originalPtr, size_t originalSize, size_t newSize) { (void)originalSize; return realloc(originalPtr, newSize); } + static void Free(void *ptr) { free(ptr); } +}; + +/////////////////////////////////////////////////////////////////////////////// +// MemoryPoolAllocator + +//! Default memory allocator used by the parser and DOM. +/*! This allocator allocate memory blocks from pre-allocated memory chunks. + + It does not free memory blocks. And Realloc() only allocate new memory. + + The memory chunks are allocated by BaseAllocator, which is CrtAllocator by default. + + User may also supply a buffer as the first chunk. + + If the user-buffer is full then additional chunks are allocated by BaseAllocator. + + The user-buffer is not deallocated by this allocator. + + \tparam BaseAllocator the allocator type for allocating memory chunks. Default is CrtAllocator. + \implements Allocator +*/ +template +class MemoryPoolAllocator { +public: + static const bool kNeedFree = false; //!< Tell users that no need to call Free() with this allocator. (concept Allocator) + + //! Constructor with chunkSize. + /*! \param chunkSize The size of memory chunk. The default is kDefaultChunkSize. + \param baseAllocator The allocator for allocating memory chunks. + */ + MemoryPoolAllocator(size_t chunkSize = kDefaultChunkCapacity, BaseAllocator* baseAllocator = 0) : + chunkHead_(0), chunk_capacity_(chunkSize), userBuffer_(0), baseAllocator_(baseAllocator), ownBaseAllocator_(0) + { + if (!baseAllocator_) + ownBaseAllocator_ = baseAllocator_ = new BaseAllocator(); + AddChunk(chunk_capacity_); + } + + //! Constructor with user-supplied buffer. + /*! The user buffer will be used firstly. When it is full, memory pool allocates new chunk with chunk size. + + The user buffer will not be deallocated when this allocator is destructed. + + \param buffer User supplied buffer. + \param size Size of the buffer in bytes. It must at least larger than sizeof(ChunkHeader). + \param chunkSize The size of memory chunk. The default is kDefaultChunkSize. + \param baseAllocator The allocator for allocating memory chunks. + */ + MemoryPoolAllocator(char *buffer, size_t size, size_t chunkSize = kDefaultChunkCapacity, BaseAllocator* baseAllocator = 0) : + chunkHead_(0), chunk_capacity_(chunkSize), userBuffer_(buffer), baseAllocator_(baseAllocator), ownBaseAllocator_(0) + { + RAPIDJSON_ASSERT(buffer != 0); + RAPIDJSON_ASSERT(size > sizeof(ChunkHeader)); + chunkHead_ = (ChunkHeader*)buffer; + chunkHead_->capacity = size - sizeof(ChunkHeader); + chunkHead_->size = 0; + chunkHead_->next = 0; + } + + //! Destructor. + /*! This deallocates all memory chunks, excluding the user-supplied buffer. + */ + ~MemoryPoolAllocator() { + Clear(); + delete ownBaseAllocator_; + } + + //! Deallocates all memory chunks, excluding the user-supplied buffer. + void Clear() { + while(chunkHead_ != 0 && chunkHead_ != (ChunkHeader *)userBuffer_) { + ChunkHeader* next = chunkHead_->next; + baseAllocator_->Free(chunkHead_); + chunkHead_ = next; + } + } + + //! Computes the total capacity of allocated memory chunks. + /*! \return total capacity in bytes. + */ + size_t Capacity() { + size_t capacity = 0; + for (ChunkHeader* c = chunkHead_; c != 0; c = c->next) + capacity += c->capacity; + return capacity; + } + + //! Computes the memory blocks allocated. + /*! \return total used bytes. + */ + size_t Size() { + size_t size = 0; + for (ChunkHeader* c = chunkHead_; c != 0; c = c->next) + size += c->size; + return size; + } + + //! Allocates a memory block. (concept Allocator) + void* Malloc(size_t size) { + size = (size + 3) & ~3; // Force aligning size to 4 + + if (chunkHead_->size + size > chunkHead_->capacity) + AddChunk(chunk_capacity_ > size ? chunk_capacity_ : size); + + char *buffer = (char *)(chunkHead_ + 1) + chunkHead_->size; + RAPIDJSON_ASSERT(((uintptr_t)buffer & 3) == 0); // returned buffer is aligned to 4 + chunkHead_->size += size; + + return buffer; + } + + //! Resizes a memory block (concept Allocator) + void* Realloc(void* originalPtr, size_t originalSize, size_t newSize) { + if (originalPtr == 0) + return Malloc(newSize); + + // Do not shrink if new size is smaller than original + if (originalSize >= newSize) + return originalPtr; + + // Simply expand it if it is the last allocation and there is sufficient space + if (originalPtr == (char *)(chunkHead_ + 1) + chunkHead_->size - originalSize) { + size_t increment = newSize - originalSize; + increment = (increment + 3) & ~3; // Force aligning size to 4 + if (chunkHead_->size + increment <= chunkHead_->capacity) { + chunkHead_->size += increment; + RAPIDJSON_ASSERT(((uintptr_t)originalPtr & 3) == 0); // returned buffer is aligned to 4 + return originalPtr; + } + } + + // Realloc process: allocate and copy memory, do not free original buffer. + void* newBuffer = Malloc(newSize); + RAPIDJSON_ASSERT(newBuffer != 0); // Do not handle out-of-memory explicitly. + return memcpy(newBuffer, originalPtr, originalSize); + } + + //! Frees a memory block (concept Allocator) + static void Free(void *) {} // Do nothing + +private: + //! Creates a new chunk. + /*! \param capacity Capacity of the chunk in bytes. + */ + void AddChunk(size_t capacity) { + ChunkHeader* chunk = (ChunkHeader*)baseAllocator_->Malloc(sizeof(ChunkHeader) + capacity); + chunk->capacity = capacity; + chunk->size = 0; + chunk->next = chunkHead_; + chunkHead_ = chunk; + } + + static const int kDefaultChunkCapacity = 64 * 1024; //!< Default chunk capacity. + + //! Chunk header for perpending to each chunk. + /*! Chunks are stored as a singly linked list. + */ + struct ChunkHeader { + size_t capacity; //!< Capacity of the chunk in bytes (excluding the header itself). + size_t size; //!< Current size of allocated memory in bytes. + ChunkHeader *next; //!< Next chunk in the linked list. + }; + + ChunkHeader *chunkHead_; //!< Head of the chunk linked-list. Only the head chunk serves allocation. + size_t chunk_capacity_; //!< The minimum capacity of chunk when they are allocated. + char *userBuffer_; //!< User supplied buffer. + BaseAllocator* baseAllocator_; //!< base allocator for allocating memory chunks. + BaseAllocator* ownBaseAllocator_; //!< base allocator created by this object. +}; + +/////////////////////////////////////////////////////////////////////////////// +// Encoding + +/*! \class rapidjson::Encoding + \brief Concept for encoding of Unicode characters. + +\code +concept Encoding { + typename Ch; //! Type of character. + + //! \brief Encode a Unicode codepoint to a buffer. + //! \param buffer pointer to destination buffer to store the result. It should have sufficient size of encoding one character. + //! \param codepoint An unicode codepoint, ranging from 0x0 to 0x10FFFF inclusively. + //! \returns the pointer to the next character after the encoded data. + static Ch* Encode(Ch *buffer, unsigned codepoint); +}; +\endcode +*/ + +/////////////////////////////////////////////////////////////////////////////// +// UTF8 + +//! UTF-8 encoding. +/*! http://en.wikipedia.org/wiki/UTF-8 + \tparam CharType Type for storing 8-bit UTF-8 data. Default is char. + \implements Encoding +*/ +template +struct UTF8 { + typedef CharType Ch; + + static Ch* Encode(Ch *buffer, unsigned codepoint) { + if (codepoint <= 0x7F) + *buffer++ = codepoint & 0xFF; + else if (codepoint <= 0x7FF) { + *buffer++ = 0xC0 | ((codepoint >> 6) & 0xFF); + *buffer++ = 0x80 | ((codepoint & 0x3F)); + } + else if (codepoint <= 0xFFFF) { + *buffer++ = 0xE0 | ((codepoint >> 12) & 0xFF); + *buffer++ = 0x80 | ((codepoint >> 6) & 0x3F); + *buffer++ = 0x80 | (codepoint & 0x3F); + } + else { + RAPIDJSON_ASSERT(codepoint <= 0x10FFFF); + *buffer++ = 0xF0 | ((codepoint >> 18) & 0xFF); + *buffer++ = 0x80 | ((codepoint >> 12) & 0x3F); + *buffer++ = 0x80 | ((codepoint >> 6) & 0x3F); + *buffer++ = 0x80 | (codepoint & 0x3F); + } + return buffer; + } +}; + +/////////////////////////////////////////////////////////////////////////////// +// UTF16 + +//! UTF-16 encoding. +/*! http://en.wikipedia.org/wiki/UTF-16 + \tparam CharType Type for storing 16-bit UTF-16 data. Default is wchar_t. C++11 may use char16_t instead. + \implements Encoding +*/ +template +struct UTF16 { + typedef CharType Ch; + + static Ch* Encode(Ch* buffer, unsigned codepoint) { + if (codepoint <= 0xFFFF) { + RAPIDJSON_ASSERT(codepoint < 0xD800 || codepoint > 0xDFFF); // Code point itself cannot be surrogate pair + *buffer++ = static_cast(codepoint); + } + else { + RAPIDJSON_ASSERT(codepoint <= 0x10FFFF); + unsigned v = codepoint - 0x10000; + *buffer++ = static_cast((v >> 10) + 0xD800); + *buffer++ = (v & 0x3FF) + 0xDC00; + } + return buffer; + } +}; + +/////////////////////////////////////////////////////////////////////////////// +// UTF32 + +//! UTF-32 encoding. +/*! http://en.wikipedia.org/wiki/UTF-32 + \tparam Ch Type for storing 32-bit UTF-32 data. Default is unsigned. C++11 may use char32_t instead. + \implements Encoding +*/ +template +struct UTF32 { + typedef CharType Ch; + + static Ch *Encode(Ch* buffer, unsigned codepoint) { + RAPIDJSON_ASSERT(codepoint <= 0x10FFFF); + *buffer++ = codepoint; + return buffer; + } +}; + +/////////////////////////////////////////////////////////////////////////////// +// Stream + +/*! \class rapidjson::Stream + \brief Concept for reading and writing characters. + + For read-only stream, no need to implement PutBegin(), Put() and PutEnd(). + + For write-only stream, only need to implement Put(). + +\code +concept Stream { + typename Ch; //!< Character type of the stream. + + //! Read the current character from stream without moving the read cursor. + Ch Peek() const; + + //! Read the current character from stream and moving the read cursor to next character. + Ch Take(); + + //! Get the current read cursor. + //! \return Number of characters read from start. + size_t Tell(); + + //! Begin writing operation at the current read pointer. + //! \return The begin writer pointer. + Ch* PutBegin(); + + //! Write a character. + void Put(Ch c); + + //! End the writing operation. + //! \param begin The begin write pointer returned by PutBegin(). + //! \return Number of characters written. + size_t PutEnd(Ch* begin); +} +\endcode +*/ + +//! Put N copies of a character to a stream. +template +inline void PutN(Stream& stream, Ch c, size_t n) { + for (size_t i = 0; i < n; i++) + stream.Put(c); +} + +/////////////////////////////////////////////////////////////////////////////// +// StringStream + +//! Read-only string stream. +/*! \implements Stream +*/ +template +struct GenericStringStream { + typedef typename Encoding::Ch Ch; + + GenericStringStream(const Ch *src) : src_(src), head_(src) {} + + Ch Peek() const { return *src_; } + Ch Take() { return *src_++; } + size_t Tell() const { return src_ - head_; } + + Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; } + void Put(Ch) { RAPIDJSON_ASSERT(false); } + size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; } + + const Ch* src_; //!< Current read position. + const Ch* head_; //!< Original head of the string. +}; + +typedef GenericStringStream > StringStream; + +/////////////////////////////////////////////////////////////////////////////// +// InsituStringStream + +//! A read-write string stream. +/*! This string stream is particularly designed for in-situ parsing. + \implements Stream +*/ +template +struct GenericInsituStringStream { + typedef typename Encoding::Ch Ch; + + GenericInsituStringStream(Ch *src) : src_(src), dst_(0), head_(src) {} + + // Read + Ch Peek() { return *src_; } + Ch Take() { return *src_++; } + size_t Tell() { return src_ - head_; } + + // Write + Ch* PutBegin() { return dst_ = src_; } + void Put(Ch c) { RAPIDJSON_ASSERT(dst_ != 0); *dst_++ = c; } + size_t PutEnd(Ch* begin) { return dst_ - begin; } + + Ch* src_; + Ch* dst_; + Ch* head_; +}; + +typedef GenericInsituStringStream > InsituStringStream; + +/////////////////////////////////////////////////////////////////////////////// +// Type + +//! Type of JSON value +enum Type { + kNullType = 0, //!< null + kFalseType = 1, //!< false + kTrueType = 2, //!< true + kObjectType = 3, //!< object + kArrayType = 4, //!< array + kStringType = 5, //!< string + kNumberType = 6, //!< number +}; + +} // namespace rapidjson + +#endif // RAPIDJSON_RAPIDJSON_H_ diff --git a/deps/rapidjson-0.11/reader.h b/deps/rapidjson-0.11/reader.h new file mode 100755 index 0000000..59761c1 --- /dev/null +++ b/deps/rapidjson-0.11/reader.h @@ -0,0 +1,683 @@ +#ifndef RAPIDJSON_READER_H_ +#define RAPIDJSON_READER_H_ + +// Copyright (c) 2011 Milo Yip (miloyip@gmail.com) +// Version 0.1 + +#include "rapidjson.h" +#include "internal/pow10.h" +#include "internal/stack.h" +#include + +#ifdef RAPIDJSON_SSE42 +#include +#elif defined(RAPIDJSON_SSE2) +#include +#endif + +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable : 4127) // conditional expression is constant +#endif + +#ifndef RAPIDJSON_PARSE_ERROR +#define RAPIDJSON_PARSE_ERROR(msg, offset) \ + RAPIDJSON_MULTILINEMACRO_BEGIN \ + parseError_ = msg; \ + errorOffset_ = offset; \ + longjmp(jmpbuf_, 1); \ + RAPIDJSON_MULTILINEMACRO_END +#endif + +namespace rapidjson { + +/////////////////////////////////////////////////////////////////////////////// +// ParseFlag + +//! Combination of parseFlags +enum ParseFlag { + kParseDefaultFlags = 0, //!< Default parse flags. Non-destructive parsing. Text strings are decoded into allocated buffer. + kParseInsituFlag = 1 //!< In-situ(destructive) parsing. +}; + +/////////////////////////////////////////////////////////////////////////////// +// Handler + +/*! \class rapidjson::Handler + \brief Concept for receiving events from GenericReader upon parsing. +\code +concept Handler { + typename Ch; + + void Null(); + void Bool(bool b); + void Int(int i); + void Uint(unsigned i); + void Int64(int64_t i); + void Uint64(uint64_t i); + void Double(double d); + void String(const Ch* str, SizeType length, bool copy); + void StartObject(); + void EndObject(SizeType memberCount); + void StartArray(); + void EndArray(SizeType elementCount); +}; +\endcode +*/ +/////////////////////////////////////////////////////////////////////////////// +// BaseReaderHandler + +//! Default implementation of Handler. +/*! This can be used as base class of any reader handler. + \implements Handler +*/ +template > +struct BaseReaderHandler { + typedef typename Encoding::Ch Ch; + + void Default() {} + void Null() { Default(); } + void Bool(bool) { Default(); } + void Int(int) { Default(); } + void Uint(unsigned) { Default(); } + void Int64(int64_t) { Default(); } + void Uint64(uint64_t) { Default(); } + void Double(double) { Default(); } + void String(const Ch*, SizeType, bool) { Default(); } + void StartObject() { Default(); } + void EndObject(SizeType) { Default(); } + void StartArray() { Default(); } + void EndArray(SizeType) { Default(); } +}; + +/////////////////////////////////////////////////////////////////////////////// +// SkipWhitespace + +//! Skip the JSON white spaces in a stream. +/*! \param stream A input stream for skipping white spaces. + \note This function has SSE2/SSE4.2 specialization. +*/ +template +void SkipWhitespace(Stream& stream) { + Stream s = stream; // Use a local copy for optimization + while (s.Peek() == ' ' || s.Peek() == '\n' || s.Peek() == '\r' || s.Peek() == '\t') + s.Take(); + stream = s; +} + +#ifdef RAPIDJSON_SSE42 +//! Skip whitespace with SSE 4.2 pcmpistrm instruction, testing 16 8-byte characters at once. +inline const char *SkipWhitespace_SIMD(const char* p) { + static const char whitespace[16] = " \n\r\t"; + __m128i w = _mm_loadu_si128((const __m128i *)&whitespace[0]); + + for (;;) { + __m128i s = _mm_loadu_si128((const __m128i *)p); + unsigned r = _mm_cvtsi128_si32(_mm_cmpistrm(w, s, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK | _SIDD_NEGATIVE_POLARITY)); + if (r == 0) // all 16 characters are whitespace + p += 16; + else { // some of characters may be non-whitespace +#ifdef _MSC_VER // Find the index of first non-whitespace + unsigned long offset; + if (_BitScanForward(&offset, r)) + return p + offset; +#else + if (r != 0) + return p + __builtin_ffs(r) - 1; +#endif + } + } +} + +#elif defined(RAPIDJSON_SSE2) + +//! Skip whitespace with SSE2 instructions, testing 16 8-byte characters at once. +inline const char *SkipWhitespace_SIMD(const char* p) { + static const char whitespaces[4][17] = { + " ", + "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r", + "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"}; + + __m128i w0 = _mm_loadu_si128((const __m128i *)&whitespaces[0][0]); + __m128i w1 = _mm_loadu_si128((const __m128i *)&whitespaces[1][0]); + __m128i w2 = _mm_loadu_si128((const __m128i *)&whitespaces[2][0]); + __m128i w3 = _mm_loadu_si128((const __m128i *)&whitespaces[3][0]); + + for (;;) { + __m128i s = _mm_loadu_si128((const __m128i *)p); + __m128i x = _mm_cmpeq_epi8(s, w0); + x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w1)); + x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w2)); + x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w3)); + unsigned short r = ~_mm_movemask_epi8(x); + if (r == 0) // all 16 characters are whitespace + p += 16; + else { // some of characters may be non-whitespace +#ifdef _MSC_VER // Find the index of first non-whitespace + unsigned long offset; + if (_BitScanForward(&offset, r)) + return p + offset; +#else + if (r != 0) + return p + __builtin_ffs(r) - 1; +#endif + } + } +} + +#endif // RAPIDJSON_SSE2 + +#ifdef RAPIDJSON_SIMD +//! Template function specialization for InsituStringStream +template<> inline void SkipWhitespace(InsituStringStream& stream) { + stream.src_ = const_cast(SkipWhitespace_SIMD(stream.src_)); +} + +//! Template function specialization for StringStream +template<> inline void SkipWhitespace(StringStream& stream) { + stream.src_ = SkipWhitespace_SIMD(stream.src_); +} +#endif // RAPIDJSON_SIMD + +/////////////////////////////////////////////////////////////////////////////// +// GenericReader + +//! SAX-style JSON parser. Use Reader for UTF8 encoding and default allocator. +/*! GenericReader parses JSON text from a stream, and send events synchronously to an + object implementing Handler concept. + + It needs to allocate a stack for storing a single decoded string during + non-destructive parsing. + + For in-situ parsing, the decoded string is directly written to the source + text string, no temporary buffer is required. + + A GenericReader object can be reused for parsing multiple JSON text. + + \tparam Encoding Encoding of both the stream and the parse output. + \tparam Allocator Allocator type for stack. +*/ +template > +class GenericReader { +public: + typedef typename Encoding::Ch Ch; + + //! Constructor. + /*! \param allocator Optional allocator for allocating stack memory. (Only use for non-destructive parsing) + \param stackCapacity stack capacity in bytes for storing a single decoded string. (Only use for non-destructive parsing) + */ + GenericReader(Allocator* allocator = 0, size_t stackCapacity = kDefaultStackCapacity) : stack_(allocator, stackCapacity), parseError_(0), errorOffset_(0) {} + + //! Parse JSON text. + /*! \tparam parseFlags Combination of ParseFlag. + \tparam Stream Type of input stream. + \tparam Handler Type of handler which must implement Handler concept. + \param stream Input stream to be parsed. + \param handler The handler to receive events. + \return Whether the parsing is successful. + */ + template + bool Parse(Stream& stream, Handler& handler) { + parseError_ = 0; + errorOffset_ = 0; + +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable : 4611) // interaction between '_setjmp' and C++ object destruction is non-portable +#endif + if (setjmp(jmpbuf_)) { +#ifdef _MSC_VER +#pragma warning(pop) +#endif + stack_.Clear(); + return false; + } + + SkipWhitespace(stream); + + if (stream.Peek() == '\0') + RAPIDJSON_PARSE_ERROR("Text only contains white space(s)", stream.Tell()); + else { + switch (stream.Peek()) { + case '{': ParseObject(stream, handler); break; + case '[': ParseArray(stream, handler); break; + default: RAPIDJSON_PARSE_ERROR("Expect either an object or array at root", stream.Tell()); + } + SkipWhitespace(stream); + + if (stream.Peek() != '\0') + RAPIDJSON_PARSE_ERROR("Nothing should follow the root object or array.", stream.Tell()); + } + + return true; + } + + bool HasParseError() const { return parseError_ != 0; } + const char* GetParseError() const { return parseError_; } + size_t GetErrorOffset() const { return errorOffset_; } + +private: + // Parse object: { string : value, ... } + template + void ParseObject(Stream& stream, Handler& handler) { + RAPIDJSON_ASSERT(stream.Peek() == '{'); + stream.Take(); // Skip '{' + handler.StartObject(); + SkipWhitespace(stream); + + if (stream.Peek() == '}') { + stream.Take(); + handler.EndObject(0); // empty object + return; + } + + for (SizeType memberCount = 0;;) { + if (stream.Peek() != '"') { + RAPIDJSON_PARSE_ERROR("Name of an object member must be a string", stream.Tell()); + break; + } + + ParseString(stream, handler); + SkipWhitespace(stream); + + if (stream.Take() != ':') { + RAPIDJSON_PARSE_ERROR("There must be a colon after the name of object member", stream.Tell()); + break; + } + SkipWhitespace(stream); + + ParseValue(stream, handler); + SkipWhitespace(stream); + + ++memberCount; + + switch(stream.Take()) { + case ',': SkipWhitespace(stream); break; + case '}': handler.EndObject(memberCount); return; + default: RAPIDJSON_PARSE_ERROR("Must be a comma or '}' after an object member", stream.Tell()); + } + } + } + + // Parse array: [ value, ... ] + template + void ParseArray(Stream& stream, Handler& handler) { + RAPIDJSON_ASSERT(stream.Peek() == '['); + stream.Take(); // Skip '[' + handler.StartArray(); + SkipWhitespace(stream); + + if (stream.Peek() == ']') { + stream.Take(); + handler.EndArray(0); // empty array + return; + } + + for (SizeType elementCount = 0;;) { + ParseValue(stream, handler); + ++elementCount; + SkipWhitespace(stream); + + switch (stream.Take()) { + case ',': SkipWhitespace(stream); break; + case ']': handler.EndArray(elementCount); return; + default: RAPIDJSON_PARSE_ERROR("Must be a comma or ']' after an array element.", stream.Tell()); + } + } + } + + template + void ParseNull(Stream& stream, Handler& handler) { + RAPIDJSON_ASSERT(stream.Peek() == 'n'); + stream.Take(); + + if (stream.Take() == 'u' && stream.Take() == 'l' && stream.Take() == 'l') + handler.Null(); + else + RAPIDJSON_PARSE_ERROR("Invalid value", stream.Tell() - 1); + } + + template + void ParseTrue(Stream& stream, Handler& handler) { + RAPIDJSON_ASSERT(stream.Peek() == 't'); + stream.Take(); + + if (stream.Take() == 'r' && stream.Take() == 'u' && stream.Take() == 'e') + handler.Bool(true); + else + RAPIDJSON_PARSE_ERROR("Invalid value", stream.Tell()); + } + + template + void ParseFalse(Stream& stream, Handler& handler) { + RAPIDJSON_ASSERT(stream.Peek() == 'f'); + stream.Take(); + + if (stream.Take() == 'a' && stream.Take() == 'l' && stream.Take() == 's' && stream.Take() == 'e') + handler.Bool(false); + else + RAPIDJSON_PARSE_ERROR("Invalid value", stream.Tell() - 1); + } + + // Helper function to parse four hexidecimal digits in \uXXXX in ParseString(). + template + unsigned ParseHex4(Stream& stream) { + Stream s = stream; // Use a local copy for optimization + unsigned codepoint = 0; + for (int i = 0; i < 4; i++) { + Ch c = s.Take(); + codepoint <<= 4; + codepoint += c; + if (c >= '0' && c <= '9') + codepoint -= '0'; + else if (c >= 'A' && c <= 'F') + codepoint -= 'A' - 10; + else if (c >= 'a' && c <= 'f') + codepoint -= 'a' - 10; + else + RAPIDJSON_PARSE_ERROR("Incorrect hex digit after \\u escape", s.Tell() - 1); + } + stream = s; // Restore stream + return codepoint; + } + + // Parse string, handling the prefix and suffix double quotes and escaping. + template + void ParseString(Stream& stream, Handler& handler) { +#define Z16 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 + static const Ch escape[256] = { + Z16, Z16, 0, 0,'\"', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,'/', + Z16, Z16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,'\\', 0, 0, 0, + 0, 0,'\b', 0, 0, 0,'\f', 0, 0, 0, 0, 0, 0, 0,'\n', 0, + 0, 0,'\r', 0,'\t', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + Z16, Z16, Z16, Z16, Z16, Z16, Z16, Z16 + }; +#undef Z16 + + Stream s = stream; // Use a local copy for optimization + RAPIDJSON_ASSERT(s.Peek() == '\"'); + s.Take(); // Skip '\"' + Ch *head; + SizeType len; + if (parseFlags & kParseInsituFlag) + head = s.PutBegin(); + else + len = 0; + +#define RAPIDJSON_PUT(x) \ + do { \ + if (parseFlags & kParseInsituFlag) \ + s.Put(x); \ + else { \ + *stack_.template Push() = x; \ + ++len; \ + } \ + } while(false) + + for (;;) { + Ch c = s.Take(); + if (c == '\\') { // Escape + Ch e = s.Take(); + if ((sizeof(Ch) == 1) && escape[(unsigned char)e]) + RAPIDJSON_PUT(escape[(unsigned char)e]); + else if (e == 'u') { // Unicode + unsigned codepoint = ParseHex4(s); + if (codepoint >= 0xD800 && codepoint <= 0xDBFF) { // Handle UTF-16 surrogate pair + if (s.Take() != '\\' || s.Take() != 'u') { + RAPIDJSON_PARSE_ERROR("Missing the second \\u in surrogate pair", s.Tell() - 2); + return; + } + unsigned codepoint2 = ParseHex4(s); + if (codepoint2 < 0xDC00 || codepoint2 > 0xDFFF) { + RAPIDJSON_PARSE_ERROR("The second \\u in surrogate pair is invalid", s.Tell() - 2); + return; + } + codepoint = (((codepoint - 0xD800) << 10) | (codepoint2 - 0xDC00)) + 0x10000; + } + + Ch buffer[4]; + SizeType count = SizeType(Encoding::Encode(buffer, codepoint) - &buffer[0]); + + if (parseFlags & kParseInsituFlag) + for (SizeType i = 0; i < count; i++) + s.Put(buffer[i]); + else { + memcpy(stack_.template Push(count), buffer, count * sizeof(Ch)); + len += count; + } + } + else { + RAPIDJSON_PARSE_ERROR("Unknown escape character", stream.Tell() - 1); + return; + } + } + else if (c == '"') { // Closing double quote + if (parseFlags & kParseInsituFlag) { + size_t length = s.PutEnd(head); + RAPIDJSON_ASSERT(length <= 0xFFFFFFFF); + RAPIDJSON_PUT('\0'); // null-terminate the string + handler.String(head, SizeType(length), false); + } + else { + RAPIDJSON_PUT('\0'); + handler.String(stack_.template Pop(len), len - 1, true); + } + stream = s; // restore stream + return; + } + else if (c == '\0') { + RAPIDJSON_PARSE_ERROR("lacks ending quotation before the end of string", stream.Tell() - 1); + return; + } + else if ((unsigned)c < 0x20) { // RFC 4627: unescaped = %x20-21 / %x23-5B / %x5D-10FFFF + RAPIDJSON_PARSE_ERROR("Incorrect unescaped character in string", stream.Tell() - 1); + return; + } + else + RAPIDJSON_PUT(c); // Normal character, just copy + } +#undef RAPIDJSON_PUT + } + + template + void ParseNumber(Stream& stream, Handler& handler) { + Stream s = stream; // Local copy for optimization + // Parse minus + bool minus = false; + if (s.Peek() == '-') { + minus = true; + s.Take(); + } + + // Parse int: zero / ( digit1-9 *DIGIT ) + unsigned i; + bool try64bit = false; + if (s.Peek() == '0') { + i = 0; + s.Take(); + } + else if (s.Peek() >= '1' && s.Peek() <= '9') { + i = s.Take() - '0'; + + if (minus) + while (s.Peek() >= '0' && s.Peek() <= '9') { + if (i >= 214748364) { // 2^31 = 2147483648 + if (i != 214748364 || s.Peek() > '8') { + try64bit = true; + break; + } + } + i = i * 10 + (s.Take() - '0'); + } + else + while (s.Peek() >= '0' && s.Peek() <= '9') { + if (i >= 429496729) { // 2^32 - 1 = 4294967295 + if (i != 429496729 || s.Peek() > '5') { + try64bit = true; + break; + } + } + i = i * 10 + (s.Take() - '0'); + } + } + else { + RAPIDJSON_PARSE_ERROR("Expect a value here.", stream.Tell()); + return; + } + + // Parse 64bit int + uint64_t i64 = 0; + bool useDouble = false; + if (try64bit) { + i64 = i; + if (minus) + while (s.Peek() >= '0' && s.Peek() <= '9') { + if (i64 >= 922337203685477580uLL) // 2^63 = 9223372036854775808 + if (i64 != 922337203685477580uLL || s.Peek() > '8') { + useDouble = true; + break; + } + i64 = i64 * 10 + (s.Take() - '0'); + } + else + while (s.Peek() >= '0' && s.Peek() <= '9') { + if (i64 >= 1844674407370955161uLL) // 2^64 - 1 = 18446744073709551615 + if (i64 != 1844674407370955161uLL || s.Peek() > '5') { + useDouble = true; + break; + } + i64 = i64 * 10 + (s.Take() - '0'); + } + } + + // Force double for big integer + double d = 0.0; + if (useDouble) { + d = (double)i64; + while (s.Peek() >= '0' && s.Peek() <= '9') { + if (d >= 1E307) { + RAPIDJSON_PARSE_ERROR("Number too big to store in double", stream.Tell()); + return; + } + d = d * 10 + (s.Take() - '0'); + } + } + + // Parse frac = decimal-point 1*DIGIT + int expFrac = 0; + if (s.Peek() == '.') { + if (!useDouble) { + d = try64bit ? (double)i64 : (double)i; + useDouble = true; + } + s.Take(); + + if (s.Peek() >= '0' && s.Peek() <= '9') { + d = d * 10 + (s.Take() - '0'); + --expFrac; + } + else { + RAPIDJSON_PARSE_ERROR("At least one digit in fraction part", stream.Tell()); + return; + } + + while (s.Peek() >= '0' && s.Peek() <= '9') { + if (expFrac > -16) { + d = d * 10 + (s.Peek() - '0'); + --expFrac; + } + s.Take(); + } + } + + // Parse exp = e [ minus / plus ] 1*DIGIT + int exp = 0; + if (s.Peek() == 'e' || s.Peek() == 'E') { + if (!useDouble) { + d = try64bit ? (double)i64 : (double)i; + useDouble = true; + } + s.Take(); + + bool expMinus = false; + if (s.Peek() == '+') + s.Take(); + else if (s.Peek() == '-') { + s.Take(); + expMinus = true; + } + + if (s.Peek() >= '0' && s.Peek() <= '9') { + exp = s.Take() - '0'; + while (s.Peek() >= '0' && s.Peek() <= '9') { + exp = exp * 10 + (s.Take() - '0'); + if (exp > 308) { + RAPIDJSON_PARSE_ERROR("Number too big to store in double", stream.Tell()); + return; + } + } + } + else { + RAPIDJSON_PARSE_ERROR("At least one digit in exponent", s.Tell()); + return; + } + + if (expMinus) + exp = -exp; + } + + // Finish parsing, call event according to the type of number. + if (useDouble) { + d *= internal::Pow10(exp + expFrac); + handler.Double(minus ? -d : d); + } + else { + if (try64bit) { + if (minus) + handler.Int64(-(int64_t)i64); + else + handler.Uint64(i64); + } + else { + if (minus) + handler.Int(-(int)i); + else + handler.Uint(i); + } + } + + stream = s; // restore stream + } + + // Parse any JSON value + template + void ParseValue(Stream& stream, Handler& handler) { + switch (stream.Peek()) { + case 'n': ParseNull (stream, handler); break; + case 't': ParseTrue (stream, handler); break; + case 'f': ParseFalse (stream, handler); break; + case '"': ParseString(stream, handler); break; + case '{': ParseObject(stream, handler); break; + case '[': ParseArray (stream, handler); break; + default : ParseNumber(stream, handler); + } + } + + static const size_t kDefaultStackCapacity = 256; //!< Default stack capacity in bytes for storing a single decoded string. + internal::Stack stack_; //!< A stack for storing decoded string temporarily during non-destructive parsing. + jmp_buf jmpbuf_; //!< setjmp buffer for fast exit from nested parsing function calls. + const char* parseError_; + size_t errorOffset_; +}; // class GenericReader + +//! Reader with UTF8 encoding and default allocator. +typedef GenericReader > Reader; + +} // namespace rapidjson + +#ifdef _MSC_VER +#pragma warning(pop) +#endif + +#endif // RAPIDJSON_READER_H_ diff --git a/deps/rapidjson-0.11/stringbuffer.h b/deps/rapidjson-0.11/stringbuffer.h new file mode 100755 index 0000000..34cff58 --- /dev/null +++ b/deps/rapidjson-0.11/stringbuffer.h @@ -0,0 +1,49 @@ +#ifndef RAPIDJSON_STRINGBUFFER_H_ +#define RAPIDJSON_STRINGBUFFER_H_ + +#include "rapidjson.h" +#include "internal/stack.h" + +namespace rapidjson { + +//! Represents an in-memory output stream. +/*! + \tparam Encoding Encoding of the stream. + \tparam Allocator type for allocating memory buffer. + \implements Stream +*/ +template +struct GenericStringBuffer { + typedef typename Encoding::Ch Ch; + + GenericStringBuffer(Allocator* allocator = 0, size_t capacity = kDefaultCapacity) : stack_(allocator, capacity) {} + + void Put(Ch c) { *stack_.template Push() = c; } + + void Clear() { stack_.Clear(); } + + const char* GetString() const { + // Push and pop a null terminator. This is safe. + *stack_.template Push() = '\0'; + stack_.template Pop(1); + + return stack_.template Bottom(); + } + + size_t Size() const { return stack_.GetSize(); } + + static const size_t kDefaultCapacity = 256; + mutable internal::Stack stack_; +}; + +typedef GenericStringBuffer > StringBuffer; + +//! Implement specialized version of PutN() with memset() for better performance. +template<> +inline void PutN(GenericStringBuffer >& stream, char c, size_t n) { + memset(stream.stack_.Push(n), c, n * sizeof(c)); +} + +} // namespace rapidjson + +#endif // RAPIDJSON_STRINGBUFFER_H_ diff --git a/deps/rapidjson-0.11/writer.h b/deps/rapidjson-0.11/writer.h new file mode 100755 index 0000000..9d674b7 --- /dev/null +++ b/deps/rapidjson-0.11/writer.h @@ -0,0 +1,241 @@ +#ifndef RAPIDJSON_WRITER_H_ +#define RAPIDJSON_WRITER_H_ + +#include "rapidjson.h" +#include "internal/stack.h" +#include "internal/strfunc.h" +#include // snprintf() or _sprintf_s() +#include // placement new + +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable : 4127) // conditional expression is constant +#endif + +namespace rapidjson { + +//! JSON writer +/*! Writer implements the concept Handler. + It generates JSON text by events to an output stream. + + User may programmatically calls the functions of a writer to generate JSON text. + + On the other side, a writer can also be passed to objects that generates events, + + for example Reader::Parse() and Document::Accept(). + + \tparam Stream Type of ouptut stream. + \tparam Encoding Encoding of both source strings and output. + \implements Handler +*/ +template, typename Allocator = MemoryPoolAllocator<> > +class Writer { +public: + typedef typename Encoding::Ch Ch; + + Writer(Stream& stream, Allocator* allocator = 0, size_t levelDepth = kDefaultLevelDepth) : + stream_(stream), level_stack_(allocator, levelDepth * sizeof(Level)) {} + + //@name Implementation of Handler + //@{ + Writer& Null() { Prefix(kNullType); WriteNull(); return *this; } + Writer& Bool(bool b) { Prefix(b ? kTrueType : kFalseType); WriteBool(b); return *this; } + Writer& Int(int i) { Prefix(kNumberType); WriteInt(i); return *this; } + Writer& Uint(unsigned u) { Prefix(kNumberType); WriteUint(u); return *this; } + Writer& Int64(int64_t i64) { Prefix(kNumberType); WriteInt64(i64); return *this; } + Writer& Uint64(uint64_t u64) { Prefix(kNumberType); WriteUint64(u64); return *this; } + Writer& Double(double d) { Prefix(kNumberType); WriteDouble(d); return *this; } + + Writer& String(const Ch* str, SizeType length, bool copy = false) { + (void)copy; + Prefix(kStringType); + WriteString(str, length); + return *this; + } + + Writer& StartObject() { + Prefix(kObjectType); + new (level_stack_.template Push()) Level(false); + WriteStartObject(); + return *this; + } + + Writer& EndObject(SizeType memberCount = 0) { + (void)memberCount; + RAPIDJSON_ASSERT(level_stack_.GetSize() >= sizeof(Level)); + RAPIDJSON_ASSERT(!level_stack_.template Top()->inArray); + level_stack_.template Pop(1); + WriteEndObject(); + return *this; + } + + Writer& StartArray() { + Prefix(kArrayType); + new (level_stack_.template Push()) Level(true); + WriteStartArray(); + return *this; + } + + Writer& EndArray(SizeType elementCount = 0) { + (void)elementCount; + RAPIDJSON_ASSERT(level_stack_.GetSize() >= sizeof(Level)); + RAPIDJSON_ASSERT(level_stack_.template Top()->inArray); + level_stack_.template Pop(1); + WriteEndArray(); + return *this; + } + //@} + + //! Simpler but slower overload. + Writer& String(const Ch* str) { return String(str, internal::StrLen(str)); } + +protected: + //! Information for each nested level + struct Level { + Level(bool inArray_) : inArray(inArray_), valueCount(0) {} + bool inArray; //!< true if in array, otherwise in object + size_t valueCount; //!< number of values in this level + }; + + static const size_t kDefaultLevelDepth = 32; + + void WriteNull() { + stream_.Put('n'); stream_.Put('u'); stream_.Put('l'); stream_.Put('l'); + } + + void WriteBool(bool b) { + if (b) { + stream_.Put('t'); stream_.Put('r'); stream_.Put('u'); stream_.Put('e'); + } + else { + stream_.Put('f'); stream_.Put('a'); stream_.Put('l'); stream_.Put('s'); stream_.Put('e'); + } + } + + void WriteInt(int i) { + if (i < 0) { + stream_.Put('-'); + i = -i; + } + WriteUint((unsigned)i); + } + + void WriteUint(unsigned u) { + char buffer[10]; + char *p = buffer; + do { + *p++ = (u % 10) + '0'; + u /= 10; + } while (u > 0); + + do { + --p; + stream_.Put(*p); + } while (p != buffer); + } + + void WriteInt64(int64_t i64) { + if (i64 < 0) { + stream_.Put('-'); + i64 = -i64; + } + WriteUint64((uint64_t)i64); + } + + void WriteUint64(uint64_t u64) { + char buffer[20]; + char *p = buffer; + do { + *p++ = char(u64 % 10) + '0'; + u64 /= 10; + } while (u64 > 0); + + do { + --p; + stream_.Put(*p); + } while (p != buffer); + } + + //! \todo Optimization with custom double-to-string converter. + void WriteDouble(double d) { + char buffer[100]; +#if _MSC_VER + int ret = sprintf_s(buffer, sizeof(buffer), "%g", d); +#else + int ret = snprintf(buffer, sizeof(buffer), "%g", d); +#endif + RAPIDJSON_ASSERT(ret >= 1); + for (int i = 0; i < ret; i++) + stream_.Put(buffer[i]); + } + + void WriteString(const Ch* str, SizeType length) { + static const char hexDigits[] = "0123456789ABCDEF"; + static const char escape[256] = { +#define Z16 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 + //0 1 2 3 4 5 6 7 8 9 A B C D E F + 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'b', 't', 'n', 'u', 'f', 'r', 'u', 'u', // 00 + 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', // 10 + 0, 0, '"', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 20 + Z16, Z16, // 30~4F + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,'\\', 0, 0, 0, // 50 + Z16, Z16, Z16, Z16, Z16, Z16, Z16, Z16, Z16, Z16 // 60~FF +#undef Z16 + }; + + stream_.Put('\"'); + for (const Ch* p = str; p != str + length; ++p) { + if ((sizeof(Ch) == 1 || *p < 256) && escape[(unsigned char)*p]) { + stream_.Put('\\'); + stream_.Put(escape[(unsigned char)*p]); + if (escape[(unsigned char)*p] == 'u') { + stream_.Put('0'); + stream_.Put('0'); + stream_.Put(hexDigits[(*p) >> 4]); + stream_.Put(hexDigits[(*p) & 0xF]); + } + } + else + stream_.Put(*p); + } + stream_.Put('\"'); + } + + void WriteStartObject() { stream_.Put('{'); } + void WriteEndObject() { stream_.Put('}'); } + void WriteStartArray() { stream_.Put('['); } + void WriteEndArray() { stream_.Put(']'); } + + void Prefix(Type type) { + (void)type; + if (level_stack_.GetSize() != 0) { // this value is not at root + Level* level = level_stack_.template Top(); + if (level->valueCount > 0) { + if (level->inArray) + stream_.Put(','); // add comma if it is not the first element in array + else // in object + stream_.Put((level->valueCount % 2 == 0) ? ',' : ':'); + } + if (!level->inArray && level->valueCount % 2 == 0) + RAPIDJSON_ASSERT(type == kStringType); // if it's in object, then even number should be a name + level->valueCount++; + } + else + RAPIDJSON_ASSERT(type == kObjectType || type == kArrayType); + } + + Stream& stream_; + internal::Stack level_stack_; + +private: + // Prohibit assignment for VC C4512 warning + Writer& operator=(const Writer& w); +}; + +} // namespace rapidjson + +#ifdef _MSC_VER +#pragma warning(pop) +#endif + +#endif // RAPIDJSON_RAPIDJSON_H_ diff --git a/deps/tclap-1.2.1/tclap/Arg.h b/deps/tclap-1.2.1/tclap/Arg.h new file mode 100644 index 0000000..b28eef1 --- /dev/null +++ b/deps/tclap-1.2.1/tclap/Arg.h @@ -0,0 +1,692 @@ +// -*- Mode: c++; c-basic-offset: 4; tab-width: 4; -*- + +/****************************************************************************** + * + * file: Arg.h + * + * Copyright (c) 2003, Michael E. Smoot . + * Copyright (c) 2004, Michael E. Smoot, Daniel Aarno . + * All rights reverved. + * + * See the file COPYING in the top directory of this distribution for + * more information. + * + * THE SOFTWARE IS PROVIDED _AS IS_, WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + *****************************************************************************/ + + +#ifndef TCLAP_ARGUMENT_H +#define TCLAP_ARGUMENT_H + +#ifdef HAVE_CONFIG_H +#include +#else +#define HAVE_SSTREAM +#endif + +#include +#include +#include +#include +#include +#include + +#if defined(HAVE_SSTREAM) +#include +typedef std::istringstream istringstream; +#elif defined(HAVE_STRSTREAM) +#include +typedef std::istrstream istringstream; +#else +#error "Need a stringstream (sstream or strstream) to compile!" +#endif + +#include +#include +#include +#include +#include + +namespace TCLAP { + +/** + * A virtual base class that defines the essential data for all arguments. + * This class, or one of its existing children, must be subclassed to do + * anything. + */ +class Arg +{ + private: + /** + * Prevent accidental copying. + */ + Arg(const Arg& rhs); + + /** + * Prevent accidental copying. + */ + Arg& operator=(const Arg& rhs); + + /** + * Indicates whether the rest of the arguments should be ignored. + */ + static bool& ignoreRestRef() { static bool ign = false; return ign; } + + /** + * The delimiter that separates an argument flag/name from the + * value. + */ + static char& delimiterRef() { static char delim = ' '; return delim; } + + protected: + + /** + * The single char flag used to identify the argument. + * This value (preceded by a dash {-}), can be used to identify + * an argument on the command line. The _flag can be blank, + * in fact this is how unlabeled args work. Unlabeled args must + * override appropriate functions to get correct handling. Note + * that the _flag does NOT include the dash as part of the flag. + */ + std::string _flag; + + /** + * A single work namd indentifying the argument. + * This value (preceded by two dashed {--}) can also be used + * to identify an argument on the command line. Note that the + * _name does NOT include the two dashes as part of the _name. The + * _name cannot be blank. + */ + std::string _name; + + /** + * Description of the argument. + */ + std::string _description; + + /** + * Indicating whether the argument is required. + */ + bool _required; + + /** + * Label to be used in usage description. Normally set to + * "required", but can be changed when necessary. + */ + std::string _requireLabel; + + /** + * Indicates whether a value is required for the argument. + * Note that the value may be required but the argument/value + * combination may not be, as specified by _required. + */ + bool _valueRequired; + + /** + * Indicates whether the argument has been set. + * Indicates that a value on the command line has matched the + * name/flag of this argument and the values have been set accordingly. + */ + bool _alreadySet; + + /** + * A pointer to a vistitor object. + * The visitor allows special handling to occur as soon as the + * argument is matched. This defaults to NULL and should not + * be used unless absolutely necessary. + */ + Visitor* _visitor; + + /** + * Whether this argument can be ignored, if desired. + */ + bool _ignoreable; + + /** + * Indicates that the arg was set as part of an XOR and not on the + * command line. + */ + bool _xorSet; + + bool _acceptsMultipleValues; + + /** + * Performs the special handling described by the Vistitor. + */ + void _checkWithVisitor() const; + + /** + * Primary constructor. YOU (yes you) should NEVER construct an Arg + * directly, this is a base class that is extended by various children + * that are meant to be used. Use SwitchArg, ValueArg, MultiArg, + * UnlabeledValueArg, or UnlabeledMultiArg instead. + * + * \param flag - The flag identifying the argument. + * \param name - The name identifying the argument. + * \param desc - The description of the argument, used in the usage. + * \param req - Whether the argument is required. + * \param valreq - Whether the a value is required for the argument. + * \param v - The visitor checked by the argument. Defaults to NULL. + */ + Arg( const std::string& flag, + const std::string& name, + const std::string& desc, + bool req, + bool valreq, + Visitor* v = NULL ); + + public: + /** + * Destructor. + */ + virtual ~Arg(); + + /** + * Adds this to the specified list of Args. + * \param argList - The list to add this to. + */ + virtual void addToList( std::list& argList ) const; + + /** + * Begin ignoring arguments since the "--" argument was specified. + */ + static void beginIgnoring() { ignoreRestRef() = true; } + + /** + * Whether to ignore the rest. + */ + static bool ignoreRest() { return ignoreRestRef(); } + + /** + * The delimiter that separates an argument flag/name from the + * value. + */ + static char delimiter() { return delimiterRef(); } + + /** + * The char used as a place holder when SwitchArgs are combined. + * Currently set to the bell char (ASCII 7). + */ + static char blankChar() { return (char)7; } + + /** + * The char that indicates the beginning of a flag. Defaults to '-', but + * clients can define TCLAP_FLAGSTARTCHAR to override. + */ +#ifndef TCLAP_FLAGSTARTCHAR +#define TCLAP_FLAGSTARTCHAR '-' +#endif + static char flagStartChar() { return TCLAP_FLAGSTARTCHAR; } + + /** + * The sting that indicates the beginning of a flag. Defaults to "-", but + * clients can define TCLAP_FLAGSTARTSTRING to override. Should be the same + * as TCLAP_FLAGSTARTCHAR. + */ +#ifndef TCLAP_FLAGSTARTSTRING +#define TCLAP_FLAGSTARTSTRING "-" +#endif + static const std::string flagStartString() { return TCLAP_FLAGSTARTSTRING; } + + /** + * The sting that indicates the beginning of a name. Defaults to "--", but + * clients can define TCLAP_NAMESTARTSTRING to override. + */ +#ifndef TCLAP_NAMESTARTSTRING +#define TCLAP_NAMESTARTSTRING "--" +#endif + static const std::string nameStartString() { return TCLAP_NAMESTARTSTRING; } + + /** + * The name used to identify the ignore rest argument. + */ + static const std::string ignoreNameString() { return "ignore_rest"; } + + /** + * Sets the delimiter for all arguments. + * \param c - The character that delimits flags/names from values. + */ + static void setDelimiter( char c ) { delimiterRef() = c; } + + /** + * Pure virtual method meant to handle the parsing and value assignment + * of the string on the command line. + * \param i - Pointer the the current argument in the list. + * \param args - Mutable list of strings. What is + * passed in from main. + */ + virtual bool processArg(int *i, std::vector& args) = 0; + + /** + * Operator ==. + * Equality operator. Must be virtual to handle unlabeled args. + * \param a - The Arg to be compared to this. + */ + virtual bool operator==(const Arg& a) const; + + /** + * Returns the argument flag. + */ + const std::string& getFlag() const; + + /** + * Returns the argument name. + */ + const std::string& getName() const; + + /** + * Returns the argument description. + */ + std::string getDescription() const; + + /** + * Indicates whether the argument is required. + */ + virtual bool isRequired() const; + + /** + * Sets _required to true. This is used by the XorHandler. + * You really have no reason to ever use it. + */ + void forceRequired(); + + /** + * Sets the _alreadySet value to true. This is used by the XorHandler. + * You really have no reason to ever use it. + */ + void xorSet(); + + /** + * Indicates whether a value must be specified for argument. + */ + bool isValueRequired() const; + + /** + * Indicates whether the argument has already been set. Only true + * if the arg has been matched on the command line. + */ + bool isSet() const; + + /** + * Indicates whether the argument can be ignored, if desired. + */ + bool isIgnoreable() const; + + /** + * A method that tests whether a string matches this argument. + * This is generally called by the processArg() method. This + * method could be re-implemented by a child to change how + * arguments are specified on the command line. + * \param s - The string to be compared to the flag/name to determine + * whether the arg matches. + */ + virtual bool argMatches( const std::string& s ) const; + + /** + * Returns a simple string representation of the argument. + * Primarily for debugging. + */ + virtual std::string toString() const; + + /** + * Returns a short ID for the usage. + * \param valueId - The value used in the id. + */ + virtual std::string shortID( const std::string& valueId = "val" ) const; + + /** + * Returns a long ID for the usage. + * \param valueId - The value used in the id. + */ + virtual std::string longID( const std::string& valueId = "val" ) const; + + /** + * Trims a value off of the flag. + * \param flag - The string from which the flag and value will be + * trimmed. Contains the flag once the value has been trimmed. + * \param value - Where the value trimmed from the string will + * be stored. + */ + virtual void trimFlag( std::string& flag, std::string& value ) const; + + /** + * Checks whether a given string has blank chars, indicating that + * it is a combined SwitchArg. If so, return true, otherwise return + * false. + * \param s - string to be checked. + */ + bool _hasBlanks( const std::string& s ) const; + + /** + * Sets the requireLabel. Used by XorHandler. You shouldn't ever + * use this. + * \param s - Set the requireLabel to this value. + */ + void setRequireLabel( const std::string& s ); + + /** + * Used for MultiArgs and XorHandler to determine whether args + * can still be set. + */ + virtual bool allowMore(); + + /** + * Use by output classes to determine whether an Arg accepts + * multiple values. + */ + virtual bool acceptsMultipleValues(); + + /** + * Clears the Arg object and allows it to be reused by new + * command lines. + */ + virtual void reset(); +}; + +/** + * Typedef of an Arg list iterator. + */ +typedef std::list::iterator ArgListIterator; + +/** + * Typedef of an Arg vector iterator. + */ +typedef std::vector::iterator ArgVectorIterator; + +/** + * Typedef of a Visitor list iterator. + */ +typedef std::list::iterator VisitorListIterator; + +/* + * Extract a value of type T from it's string representation contained + * in strVal. The ValueLike parameter used to select the correct + * specialization of ExtractValue depending on the value traits of T. + * ValueLike traits use operator>> to assign the value from strVal. + */ +template void +ExtractValue(T &destVal, const std::string& strVal, ValueLike vl) +{ + static_cast(vl); // Avoid warning about unused vl + std::istringstream is(strVal); + + int valuesRead = 0; + while ( is.good() ) { + if ( is.peek() != EOF ) +#ifdef TCLAP_SETBASE_ZERO + is >> std::setbase(0) >> destVal; +#else + is >> destVal; +#endif + else + break; + + valuesRead++; + } + + if ( is.fail() ) + throw( ArgParseException("Couldn't read argument value " + "from string '" + strVal + "'")); + + + if ( valuesRead > 1 ) + throw( ArgParseException("More than one valid value parsed from " + "string '" + strVal + "'")); + +} + +/* + * Extract a value of type T from it's string representation contained + * in strVal. The ValueLike parameter used to select the correct + * specialization of ExtractValue depending on the value traits of T. + * StringLike uses assignment (operator=) to assign from strVal. + */ +template void +ExtractValue(T &destVal, const std::string& strVal, StringLike sl) +{ + static_cast(sl); // Avoid warning about unused sl + SetString(destVal, strVal); +} + +////////////////////////////////////////////////////////////////////// +//BEGIN Arg.cpp +////////////////////////////////////////////////////////////////////// + +inline Arg::Arg(const std::string& flag, + const std::string& name, + const std::string& desc, + bool req, + bool valreq, + Visitor* v) : + _flag(flag), + _name(name), + _description(desc), + _required(req), + _requireLabel("required"), + _valueRequired(valreq), + _alreadySet(false), + _visitor( v ), + _ignoreable(true), + _xorSet(false), + _acceptsMultipleValues(false) +{ + if ( _flag.length() > 1 ) + throw(SpecificationException( + "Argument flag can only be one character long", toString() ) ); + + if ( _name != ignoreNameString() && + ( _flag == Arg::flagStartString() || + _flag == Arg::nameStartString() || + _flag == " " ) ) + throw(SpecificationException("Argument flag cannot be either '" + + Arg::flagStartString() + "' or '" + + Arg::nameStartString() + "' or a space.", + toString() ) ); + + if ( ( _name.substr( 0, Arg::flagStartString().length() ) == Arg::flagStartString() ) || + ( _name.substr( 0, Arg::nameStartString().length() ) == Arg::nameStartString() ) || + ( _name.find( " ", 0 ) != std::string::npos ) ) + throw(SpecificationException("Argument name begin with either '" + + Arg::flagStartString() + "' or '" + + Arg::nameStartString() + "' or space.", + toString() ) ); + +} + +inline Arg::~Arg() { } + +inline std::string Arg::shortID( const std::string& valueId ) const +{ + std::string id = ""; + + if ( _flag != "" ) + id = Arg::flagStartString() + _flag; + else + id = Arg::nameStartString() + _name; + + if ( _valueRequired ) + id += std::string( 1, Arg::delimiter() ) + "<" + valueId + ">"; + + if ( !_required ) + id = "[" + id + "]"; + + return id; +} + +inline std::string Arg::longID( const std::string& valueId ) const +{ + std::string id = ""; + + if ( _flag != "" ) + { + id += Arg::flagStartString() + _flag; + + if ( _valueRequired ) + id += std::string( 1, Arg::delimiter() ) + "<" + valueId + ">"; + + id += ", "; + } + + id += Arg::nameStartString() + _name; + + if ( _valueRequired ) + id += std::string( 1, Arg::delimiter() ) + "<" + valueId + ">"; + + return id; + +} + +inline bool Arg::operator==(const Arg& a) const +{ + if ( ( _flag != "" && _flag == a._flag ) || _name == a._name) + return true; + else + return false; +} + +inline std::string Arg::getDescription() const +{ + std::string desc = ""; + if ( _required ) + desc = "(" + _requireLabel + ") "; + +// if ( _valueRequired ) +// desc += "(value required) "; + + desc += _description; + return desc; +} + +inline const std::string& Arg::getFlag() const { return _flag; } + +inline const std::string& Arg::getName() const { return _name; } + +inline bool Arg::isRequired() const { return _required; } + +inline bool Arg::isValueRequired() const { return _valueRequired; } + +inline bool Arg::isSet() const +{ + if ( _alreadySet && !_xorSet ) + return true; + else + return false; +} + +inline bool Arg::isIgnoreable() const { return _ignoreable; } + +inline void Arg::setRequireLabel( const std::string& s) +{ + _requireLabel = s; +} + +inline bool Arg::argMatches( const std::string& argFlag ) const +{ + if ( ( argFlag == Arg::flagStartString() + _flag && _flag != "" ) || + argFlag == Arg::nameStartString() + _name ) + return true; + else + return false; +} + +inline std::string Arg::toString() const +{ + std::string s = ""; + + if ( _flag != "" ) + s += Arg::flagStartString() + _flag + " "; + + s += "(" + Arg::nameStartString() + _name + ")"; + + return s; +} + +inline void Arg::_checkWithVisitor() const +{ + if ( _visitor != NULL ) + _visitor->visit(); +} + +/** + * Implementation of trimFlag. + */ +inline void Arg::trimFlag(std::string& flag, std::string& value) const +{ + int stop = 0; + for ( int i = 0; static_cast(i) < flag.length(); i++ ) + if ( flag[i] == Arg::delimiter() ) + { + stop = i; + break; + } + + if ( stop > 1 ) + { + value = flag.substr(stop+1); + flag = flag.substr(0,stop); + } + +} + +/** + * Implementation of _hasBlanks. + */ +inline bool Arg::_hasBlanks( const std::string& s ) const +{ + for ( int i = 1; static_cast(i) < s.length(); i++ ) + if ( s[i] == Arg::blankChar() ) + return true; + + return false; +} + +inline void Arg::forceRequired() +{ + _required = true; +} + +inline void Arg::xorSet() +{ + _alreadySet = true; + _xorSet = true; +} + +/** + * Overridden by Args that need to added to the end of the list. + */ +inline void Arg::addToList( std::list& argList ) const +{ + argList.push_front( const_cast(this) ); +} + +inline bool Arg::allowMore() +{ + return false; +} + +inline bool Arg::acceptsMultipleValues() +{ + return _acceptsMultipleValues; +} + +inline void Arg::reset() +{ + _xorSet = false; + _alreadySet = false; +} + +////////////////////////////////////////////////////////////////////// +//END Arg.cpp +////////////////////////////////////////////////////////////////////// + +} //namespace TCLAP + +#endif + diff --git a/deps/tclap-1.2.1/tclap/ArgException.h b/deps/tclap-1.2.1/tclap/ArgException.h new file mode 100644 index 0000000..3411aa9 --- /dev/null +++ b/deps/tclap-1.2.1/tclap/ArgException.h @@ -0,0 +1,200 @@ +// -*- Mode: c++; c-basic-offset: 4; tab-width: 4; -*- + +/****************************************************************************** + * + * file: ArgException.h + * + * Copyright (c) 2003, Michael E. Smoot . + * All rights reverved. + * + * See the file COPYING in the top directory of this distribution for + * more information. + * + * THE SOFTWARE IS PROVIDED _AS IS_, WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + *****************************************************************************/ + + +#ifndef TCLAP_ARG_EXCEPTION_H +#define TCLAP_ARG_EXCEPTION_H + +#include +#include + +namespace TCLAP { + +/** + * A simple class that defines and argument exception. Should be caught + * whenever a CmdLine is created and parsed. + */ +class ArgException : public std::exception +{ + public: + + /** + * Constructor. + * \param text - The text of the exception. + * \param id - The text identifying the argument source. + * \param td - Text describing the type of ArgException it is. + * of the exception. + */ + ArgException( const std::string& text = "undefined exception", + const std::string& id = "undefined", + const std::string& td = "Generic ArgException") + : std::exception(), + _errorText(text), + _argId( id ), + _typeDescription(td) + { } + + /** + * Destructor. + */ + virtual ~ArgException() throw() { } + + /** + * Returns the error text. + */ + std::string error() const { return ( _errorText ); } + + /** + * Returns the argument id. + */ + std::string argId() const + { + if ( _argId == "undefined" ) + return " "; + else + return ( "Argument: " + _argId ); + } + + /** + * Returns the arg id and error text. + */ + const char* what() const throw() + { + static std::string ex; + ex = _argId + " -- " + _errorText; + return ex.c_str(); + } + + /** + * Returns the type of the exception. Used to explain and distinguish + * between different child exceptions. + */ + std::string typeDescription() const + { + return _typeDescription; + } + + + private: + + /** + * The text of the exception message. + */ + std::string _errorText; + + /** + * The argument related to this exception. + */ + std::string _argId; + + /** + * Describes the type of the exception. Used to distinguish + * between different child exceptions. + */ + std::string _typeDescription; + +}; + +/** + * Thrown from within the child Arg classes when it fails to properly + * parse the argument it has been passed. + */ +class ArgParseException : public ArgException +{ + public: + /** + * Constructor. + * \param text - The text of the exception. + * \param id - The text identifying the argument source + * of the exception. + */ + ArgParseException( const std::string& text = "undefined exception", + const std::string& id = "undefined" ) + : ArgException( text, + id, + std::string( "Exception found while parsing " ) + + std::string( "the value the Arg has been passed." )) + { } +}; + +/** + * Thrown from CmdLine when the arguments on the command line are not + * properly specified, e.g. too many arguments, required argument missing, etc. + */ +class CmdLineParseException : public ArgException +{ + public: + /** + * Constructor. + * \param text - The text of the exception. + * \param id - The text identifying the argument source + * of the exception. + */ + CmdLineParseException( const std::string& text = "undefined exception", + const std::string& id = "undefined" ) + : ArgException( text, + id, + std::string( "Exception found when the values ") + + std::string( "on the command line do not meet ") + + std::string( "the requirements of the defined ") + + std::string( "Args." )) + { } +}; + +/** + * Thrown from Arg and CmdLine when an Arg is improperly specified, e.g. + * same flag as another Arg, same name, etc. + */ +class SpecificationException : public ArgException +{ + public: + /** + * Constructor. + * \param text - The text of the exception. + * \param id - The text identifying the argument source + * of the exception. + */ + SpecificationException( const std::string& text = "undefined exception", + const std::string& id = "undefined" ) + : ArgException( text, + id, + std::string("Exception found when an Arg object ")+ + std::string("is improperly defined by the ") + + std::string("developer." )) + { } + +}; + +class ExitException { +public: + ExitException(int estat) : _estat(estat) {} + + int getExitStatus() const { return _estat; } + +private: + int _estat; +}; + +} // namespace TCLAP + +#endif + diff --git a/deps/tclap-1.2.1/tclap/ArgTraits.h b/deps/tclap-1.2.1/tclap/ArgTraits.h new file mode 100644 index 0000000..0b2c18f --- /dev/null +++ b/deps/tclap-1.2.1/tclap/ArgTraits.h @@ -0,0 +1,87 @@ +// -*- Mode: c++; c-basic-offset: 4; tab-width: 4; -*- + +/****************************************************************************** + * + * file: ArgTraits.h + * + * Copyright (c) 2007, Daniel Aarno, Michael E. Smoot . + * All rights reverved. + * + * See the file COPYING in the top directory of this distribution for + * more information. + * + * THE SOFTWARE IS PROVIDED _AS IS_, WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + *****************************************************************************/ + +// This is an internal tclap file, you should probably not have to +// include this directly + +#ifndef TCLAP_ARGTRAITS_H +#define TCLAP_ARGTRAITS_H + +namespace TCLAP { + +// We use two empty structs to get compile type specialization +// function to work + +/** + * A value like argument value type is a value that can be set using + * operator>>. This is the default value type. + */ +struct ValueLike { + typedef ValueLike ValueCategory; + virtual ~ValueLike() {} +}; + +/** + * A string like argument value type is a value that can be set using + * operator=(string). Usefull if the value type contains spaces which + * will be broken up into individual tokens by operator>>. + */ +struct StringLike { + virtual ~StringLike() {} +}; + +/** + * A class can inherit from this object to make it have string like + * traits. This is a compile time thing and does not add any overhead + * to the inherenting class. + */ +struct StringLikeTrait { + typedef StringLike ValueCategory; + virtual ~StringLikeTrait() {} +}; + +/** + * A class can inherit from this object to make it have value like + * traits. This is a compile time thing and does not add any overhead + * to the inherenting class. + */ +struct ValueLikeTrait { + typedef ValueLike ValueCategory; + virtual ~ValueLikeTrait() {} +}; + +/** + * Arg traits are used to get compile type specialization when parsing + * argument values. Using an ArgTraits you can specify the way that + * values gets assigned to any particular type during parsing. The two + * supported types are StringLike and ValueLike. + */ +template +struct ArgTraits { + typedef typename T::ValueCategory ValueCategory; + virtual ~ArgTraits() {} + //typedef ValueLike ValueCategory; +}; + +#endif + +} // namespace diff --git a/deps/tclap-1.2.1/tclap/COPYING b/deps/tclap-1.2.1/tclap/COPYING new file mode 100644 index 0000000..987be0c --- /dev/null +++ b/deps/tclap-1.2.1/tclap/COPYING @@ -0,0 +1,25 @@ + + +Copyright (c) 2003 Michael E. Smoot + +Permission is hereby granted, free of charge, to any person +obtaining a copy of this software and associated documentation +files (the "Software"), to deal in the Software without restriction, +including without limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of the Software, +and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN +AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + + diff --git a/deps/tclap-1.2.1/tclap/CmdLine.h b/deps/tclap-1.2.1/tclap/CmdLine.h new file mode 100644 index 0000000..0fec8d8 --- /dev/null +++ b/deps/tclap-1.2.1/tclap/CmdLine.h @@ -0,0 +1,633 @@ +// -*- Mode: c++; c-basic-offset: 4; tab-width: 4; -*- + +/****************************************************************************** + * + * file: CmdLine.h + * + * Copyright (c) 2003, Michael E. Smoot . + * Copyright (c) 2004, Michael E. Smoot, Daniel Aarno. + * All rights reverved. + * + * See the file COPYING in the top directory of this distribution for + * more information. + * + * THE SOFTWARE IS PROVIDED _AS IS_, WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + *****************************************************************************/ + +#ifndef TCLAP_CMDLINE_H +#define TCLAP_CMDLINE_H + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include // Needed for exit(), which isn't defined in some envs. + +namespace TCLAP { + +template void DelPtr(T ptr) +{ + delete ptr; +} + +template void ClearContainer(C &c) +{ + typedef typename C::value_type value_type; + std::for_each(c.begin(), c.end(), DelPtr); + c.clear(); +} + + +/** + * The base class that manages the command line definition and passes + * along the parsing to the appropriate Arg classes. + */ +class CmdLine : public CmdLineInterface +{ + protected: + + /** + * The list of arguments that will be tested against the + * command line. + */ + std::list _argList; + + /** + * The name of the program. Set to argv[0]. + */ + std::string _progName; + + /** + * A message used to describe the program. Used in the usage output. + */ + std::string _message; + + /** + * The version to be displayed with the --version switch. + */ + std::string _version; + + /** + * The number of arguments that are required to be present on + * the command line. This is set dynamically, based on the + * Args added to the CmdLine object. + */ + int _numRequired; + + /** + * The character that is used to separate the argument flag/name + * from the value. Defaults to ' ' (space). + */ + char _delimiter; + + /** + * The handler that manages xoring lists of args. + */ + XorHandler _xorHandler; + + /** + * A list of Args to be explicitly deleted when the destructor + * is called. At the moment, this only includes the three default + * Args. + */ + std::list _argDeleteOnExitList; + + /** + * A list of Visitors to be explicitly deleted when the destructor + * is called. At the moment, these are the Vistors created for the + * default Args. + */ + std::list _visitorDeleteOnExitList; + + /** + * Object that handles all output for the CmdLine. + */ + CmdLineOutput* _output; + + /** + * Should CmdLine handle parsing exceptions internally? + */ + bool _handleExceptions; + + /** + * Throws an exception listing the missing args. + */ + void missingArgsException(); + + /** + * Checks whether a name/flag string matches entirely matches + * the Arg::blankChar. Used when multiple switches are combined + * into a single argument. + * \param s - The message to be used in the usage. + */ + bool _emptyCombined(const std::string& s); + + /** + * Perform a delete ptr; operation on ptr when this object is deleted. + */ + void deleteOnExit(Arg* ptr); + + /** + * Perform a delete ptr; operation on ptr when this object is deleted. + */ + void deleteOnExit(Visitor* ptr); + +private: + + /** + * Prevent accidental copying. + */ + CmdLine(const CmdLine& rhs); + CmdLine& operator=(const CmdLine& rhs); + + /** + * Encapsulates the code common to the constructors + * (which is all of it). + */ + void _constructor(); + + + /** + * Is set to true when a user sets the output object. We use this so + * that we don't delete objects that are created outside of this lib. + */ + bool _userSetOutput; + + /** + * Whether or not to automatically create help and version switches. + */ + bool _helpAndVersion; + + public: + + /** + * Command line constructor. Defines how the arguments will be + * parsed. + * \param message - The message to be used in the usage + * output. + * \param delimiter - The character that is used to separate + * the argument flag/name from the value. Defaults to ' ' (space). + * \param version - The version number to be used in the + * --version switch. + * \param helpAndVersion - Whether or not to create the Help and + * Version switches. Defaults to true. + */ + CmdLine(const std::string& message, + const char delimiter = ' ', + const std::string& version = "none", + bool helpAndVersion = true); + + /** + * Deletes any resources allocated by a CmdLine object. + */ + virtual ~CmdLine(); + + /** + * Adds an argument to the list of arguments to be parsed. + * \param a - Argument to be added. + */ + void add( Arg& a ); + + /** + * An alternative add. Functionally identical. + * \param a - Argument to be added. + */ + void add( Arg* a ); + + /** + * Add two Args that will be xor'd. If this method is used, add does + * not need to be called. + * \param a - Argument to be added and xor'd. + * \param b - Argument to be added and xor'd. + */ + void xorAdd( Arg& a, Arg& b ); + + /** + * Add a list of Args that will be xor'd. If this method is used, + * add does not need to be called. + * \param xors - List of Args to be added and xor'd. + */ + void xorAdd( std::vector& xors ); + + /** + * Parses the command line. + * \param argc - Number of arguments. + * \param argv - Array of arguments. + */ + void parse(int argc, const char * const * argv); + + /** + * Parses the command line. + * \param args - A vector of strings representing the args. + * args[0] is still the program name. + */ + void parse(std::vector& args); + + /** + * + */ + CmdLineOutput* getOutput(); + + /** + * + */ + void setOutput(CmdLineOutput* co); + + /** + * + */ + std::string& getVersion(); + + /** + * + */ + std::string& getProgramName(); + + /** + * + */ + std::list& getArgList(); + + /** + * + */ + XorHandler& getXorHandler(); + + /** + * + */ + char getDelimiter(); + + /** + * + */ + std::string& getMessage(); + + /** + * + */ + bool hasHelpAndVersion(); + + /** + * Disables or enables CmdLine's internal parsing exception handling. + * + * @param state Should CmdLine handle parsing exceptions internally? + */ + void setExceptionHandling(const bool state); + + /** + * Returns the current state of the internal exception handling. + * + * @retval true Parsing exceptions are handled internally. + * @retval false Parsing exceptions are propagated to the caller. + */ + bool getExceptionHandling() const; + + /** + * Allows the CmdLine object to be reused. + */ + void reset(); + +}; + + +/////////////////////////////////////////////////////////////////////////////// +//Begin CmdLine.cpp +/////////////////////////////////////////////////////////////////////////////// + +inline CmdLine::CmdLine(const std::string& m, + char delim, + const std::string& v, + bool help ) + : + _argList(std::list()), + _progName("not_set_yet"), + _message(m), + _version(v), + _numRequired(0), + _delimiter(delim), + _xorHandler(XorHandler()), + _argDeleteOnExitList(std::list()), + _visitorDeleteOnExitList(std::list()), + _output(0), + _handleExceptions(true), + _userSetOutput(false), + _helpAndVersion(help) +{ + _constructor(); +} + +inline CmdLine::~CmdLine() +{ + ClearContainer(_argDeleteOnExitList); + ClearContainer(_visitorDeleteOnExitList); + + if ( !_userSetOutput ) { + delete _output; + _output = 0; + } +} + +inline void CmdLine::_constructor() +{ + _output = new StdOutput; + + Arg::setDelimiter( _delimiter ); + + Visitor* v; + + if ( _helpAndVersion ) + { + v = new HelpVisitor( this, &_output ); + SwitchArg* help = new SwitchArg("h","help", + "Displays usage information and exits.", + false, v); + add( help ); + deleteOnExit(help); + deleteOnExit(v); + + v = new VersionVisitor( this, &_output ); + SwitchArg* vers = new SwitchArg("","version", + "Displays version information and exits.", + false, v); + add( vers ); + deleteOnExit(vers); + deleteOnExit(v); + } + + v = new IgnoreRestVisitor(); + SwitchArg* ignore = new SwitchArg(Arg::flagStartString(), + Arg::ignoreNameString(), + "Ignores the rest of the labeled arguments following this flag.", + false, v); + add( ignore ); + deleteOnExit(ignore); + deleteOnExit(v); +} + +inline void CmdLine::xorAdd( std::vector& ors ) +{ + _xorHandler.add( ors ); + + for (ArgVectorIterator it = ors.begin(); it != ors.end(); it++) + { + (*it)->forceRequired(); + (*it)->setRequireLabel( "OR required" ); + add( *it ); + } +} + +inline void CmdLine::xorAdd( Arg& a, Arg& b ) +{ + std::vector ors; + ors.push_back( &a ); + ors.push_back( &b ); + xorAdd( ors ); +} + +inline void CmdLine::add( Arg& a ) +{ + add( &a ); +} + +inline void CmdLine::add( Arg* a ) +{ + for( ArgListIterator it = _argList.begin(); it != _argList.end(); it++ ) + if ( *a == *(*it) ) + throw( SpecificationException( + "Argument with same flag/name already exists!", + a->longID() ) ); + + a->addToList( _argList ); + + if ( a->isRequired() ) + _numRequired++; +} + + +inline void CmdLine::parse(int argc, const char * const * argv) +{ + // this step is necessary so that we have easy access to + // mutable strings. + std::vector args; + for (int i = 0; i < argc; i++) + args.push_back(argv[i]); + + parse(args); +} + +inline void CmdLine::parse(std::vector& args) +{ + bool shouldExit = false; + int estat = 0; + + try { + _progName = args.front(); + args.erase(args.begin()); + + int requiredCount = 0; + + for (int i = 0; static_cast(i) < args.size(); i++) + { + bool matched = false; + for (ArgListIterator it = _argList.begin(); + it != _argList.end(); it++) { + if ( (*it)->processArg( &i, args ) ) + { + requiredCount += _xorHandler.check( *it ); + matched = true; + break; + } + } + + // checks to see if the argument is an empty combined + // switch and if so, then we've actually matched it + if ( !matched && _emptyCombined( args[i] ) ) + matched = true; + + if ( !matched && !Arg::ignoreRest() ) + throw(CmdLineParseException("Couldn't find match " + "for argument", + args[i])); + } + + if ( requiredCount < _numRequired ) + missingArgsException(); + + if ( requiredCount > _numRequired ) + throw(CmdLineParseException("Too many arguments!")); + + } catch ( ArgException& e ) { + // If we're not handling the exceptions, rethrow. + if ( !_handleExceptions) { + throw; + } + + try { + _output->failure(*this,e); + } catch ( ExitException &ee ) { + estat = ee.getExitStatus(); + shouldExit = true; + } + } catch (ExitException &ee) { + // If we're not handling the exceptions, rethrow. + if ( !_handleExceptions) { + throw; + } + + estat = ee.getExitStatus(); + shouldExit = true; + } + + if (shouldExit) + exit(estat); +} + +inline bool CmdLine::_emptyCombined(const std::string& s) +{ + if ( s.length() > 0 && s[0] != Arg::flagStartChar() ) + return false; + + for ( int i = 1; static_cast(i) < s.length(); i++ ) + if ( s[i] != Arg::blankChar() ) + return false; + + return true; +} + +inline void CmdLine::missingArgsException() +{ + int count = 0; + + std::string missingArgList; + for (ArgListIterator it = _argList.begin(); it != _argList.end(); it++) + { + if ( (*it)->isRequired() && !(*it)->isSet() ) + { + missingArgList += (*it)->getName(); + missingArgList += ", "; + count++; + } + } + missingArgList = missingArgList.substr(0,missingArgList.length()-2); + + std::string msg; + if ( count > 1 ) + msg = "Required arguments missing: "; + else + msg = "Required argument missing: "; + + msg += missingArgList; + + throw(CmdLineParseException(msg)); +} + +inline void CmdLine::deleteOnExit(Arg* ptr) +{ + _argDeleteOnExitList.push_back(ptr); +} + +inline void CmdLine::deleteOnExit(Visitor* ptr) +{ + _visitorDeleteOnExitList.push_back(ptr); +} + +inline CmdLineOutput* CmdLine::getOutput() +{ + return _output; +} + +inline void CmdLine::setOutput(CmdLineOutput* co) +{ + if ( !_userSetOutput ) + delete _output; + _userSetOutput = true; + _output = co; +} + +inline std::string& CmdLine::getVersion() +{ + return _version; +} + +inline std::string& CmdLine::getProgramName() +{ + return _progName; +} + +inline std::list& CmdLine::getArgList() +{ + return _argList; +} + +inline XorHandler& CmdLine::getXorHandler() +{ + return _xorHandler; +} + +inline char CmdLine::getDelimiter() +{ + return _delimiter; +} + +inline std::string& CmdLine::getMessage() +{ + return _message; +} + +inline bool CmdLine::hasHelpAndVersion() +{ + return _helpAndVersion; +} + +inline void CmdLine::setExceptionHandling(const bool state) +{ + _handleExceptions = state; +} + +inline bool CmdLine::getExceptionHandling() const +{ + return _handleExceptions; +} + +inline void CmdLine::reset() +{ + for( ArgListIterator it = _argList.begin(); it != _argList.end(); it++ ) + (*it)->reset(); + + _progName.clear(); +} + +/////////////////////////////////////////////////////////////////////////////// +//End CmdLine.cpp +/////////////////////////////////////////////////////////////////////////////// + + + +} //namespace TCLAP +#endif diff --git a/deps/tclap-1.2.1/tclap/CmdLineInterface.h b/deps/tclap-1.2.1/tclap/CmdLineInterface.h new file mode 100644 index 0000000..1b25e9b --- /dev/null +++ b/deps/tclap-1.2.1/tclap/CmdLineInterface.h @@ -0,0 +1,150 @@ + +/****************************************************************************** + * + * file: CmdLineInterface.h + * + * Copyright (c) 2003, Michael E. Smoot . + * Copyright (c) 2004, Michael E. Smoot, Daniel Aarno. + * All rights reverved. + * + * See the file COPYING in the top directory of this distribution for + * more information. + * + * THE SOFTWARE IS PROVIDED _AS IS_, WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + *****************************************************************************/ + +#ifndef TCLAP_COMMANDLINE_INTERFACE_H +#define TCLAP_COMMANDLINE_INTERFACE_H + +#include +#include +#include +#include +#include + + +namespace TCLAP { + +class Arg; +class CmdLineOutput; +class XorHandler; + +/** + * The base class that manages the command line definition and passes + * along the parsing to the appropriate Arg classes. + */ +class CmdLineInterface +{ + public: + + /** + * Destructor + */ + virtual ~CmdLineInterface() {} + + /** + * Adds an argument to the list of arguments to be parsed. + * \param a - Argument to be added. + */ + virtual void add( Arg& a )=0; + + /** + * An alternative add. Functionally identical. + * \param a - Argument to be added. + */ + virtual void add( Arg* a )=0; + + /** + * Add two Args that will be xor'd. + * If this method is used, add does + * not need to be called. + * \param a - Argument to be added and xor'd. + * \param b - Argument to be added and xor'd. + */ + virtual void xorAdd( Arg& a, Arg& b )=0; + + /** + * Add a list of Args that will be xor'd. If this method is used, + * add does not need to be called. + * \param xors - List of Args to be added and xor'd. + */ + virtual void xorAdd( std::vector& xors )=0; + + /** + * Parses the command line. + * \param argc - Number of arguments. + * \param argv - Array of arguments. + */ + virtual void parse(int argc, const char * const * argv)=0; + + /** + * Parses the command line. + * \param args - A vector of strings representing the args. + * args[0] is still the program name. + */ + void parse(std::vector& args); + + /** + * Returns the CmdLineOutput object. + */ + virtual CmdLineOutput* getOutput()=0; + + /** + * \param co - CmdLineOutput object that we want to use instead. + */ + virtual void setOutput(CmdLineOutput* co)=0; + + /** + * Returns the version string. + */ + virtual std::string& getVersion()=0; + + /** + * Returns the program name string. + */ + virtual std::string& getProgramName()=0; + + /** + * Returns the argList. + */ + virtual std::list& getArgList()=0; + + /** + * Returns the XorHandler. + */ + virtual XorHandler& getXorHandler()=0; + + /** + * Returns the delimiter string. + */ + virtual char getDelimiter()=0; + + /** + * Returns the message string. + */ + virtual std::string& getMessage()=0; + + /** + * Indicates whether or not the help and version switches were created + * automatically. + */ + virtual bool hasHelpAndVersion()=0; + + /** + * Resets the instance as if it had just been constructed so that the + * instance can be reused. + */ + virtual void reset()=0; +}; + +} //namespace + + +#endif diff --git a/deps/tclap-1.2.1/tclap/CmdLineOutput.h b/deps/tclap-1.2.1/tclap/CmdLineOutput.h new file mode 100644 index 0000000..71ee5a3 --- /dev/null +++ b/deps/tclap-1.2.1/tclap/CmdLineOutput.h @@ -0,0 +1,74 @@ + + +/****************************************************************************** + * + * file: CmdLineOutput.h + * + * Copyright (c) 2004, Michael E. Smoot + * All rights reverved. + * + * See the file COPYING in the top directory of this distribution for + * more information. + * + * THE SOFTWARE IS PROVIDED _AS IS_, WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + *****************************************************************************/ + +#ifndef TCLAP_CMDLINEOUTPUT_H +#define TCLAP_CMDLINEOUTPUT_H + +#include +#include +#include +#include +#include +#include + +namespace TCLAP { + +class CmdLineInterface; +class ArgException; + +/** + * The interface that any output object must implement. + */ +class CmdLineOutput +{ + + public: + + /** + * Virtual destructor. + */ + virtual ~CmdLineOutput() {} + + /** + * Generates some sort of output for the USAGE. + * \param c - The CmdLine object the output is generated for. + */ + virtual void usage(CmdLineInterface& c)=0; + + /** + * Generates some sort of output for the version. + * \param c - The CmdLine object the output is generated for. + */ + virtual void version(CmdLineInterface& c)=0; + + /** + * Generates some sort of output for a failure. + * \param c - The CmdLine object the output is generated for. + * \param e - The ArgException that caused the failure. + */ + virtual void failure( CmdLineInterface& c, + ArgException& e )=0; + +}; + +} //namespace TCLAP +#endif diff --git a/deps/tclap-1.2.1/tclap/Constraint.h b/deps/tclap-1.2.1/tclap/Constraint.h new file mode 100644 index 0000000..a92acf9 --- /dev/null +++ b/deps/tclap-1.2.1/tclap/Constraint.h @@ -0,0 +1,68 @@ + +/****************************************************************************** + * + * file: Constraint.h + * + * Copyright (c) 2005, Michael E. Smoot + * All rights reverved. + * + * See the file COPYING in the top directory of this distribution for + * more information. + * + * THE SOFTWARE IS PROVIDED _AS IS_, WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + *****************************************************************************/ + +#ifndef TCLAP_CONSTRAINT_H +#define TCLAP_CONSTRAINT_H + +#include +#include +#include +#include +#include +#include + +namespace TCLAP { + +/** + * The interface that defines the interaction between the Arg and Constraint. + */ +template +class Constraint +{ + + public: + /** + * Returns a description of the Constraint. + */ + virtual std::string description() const =0; + + /** + * Returns the short ID for the Constraint. + */ + virtual std::string shortID() const =0; + + /** + * The method used to verify that the value parsed from the command + * line meets the constraint. + * \param value - The value that will be checked. + */ + virtual bool check(const T& value) const =0; + + /** + * Destructor. + * Silences warnings about Constraint being a base class with virtual + * functions but without a virtual destructor. + */ + virtual ~Constraint() { ; } +}; + +} //namespace TCLAP +#endif diff --git a/deps/tclap-1.2.1/tclap/DocBookOutput.h b/deps/tclap-1.2.1/tclap/DocBookOutput.h new file mode 100644 index 0000000..a42ca27 --- /dev/null +++ b/deps/tclap-1.2.1/tclap/DocBookOutput.h @@ -0,0 +1,299 @@ +// -*- Mode: c++; c-basic-offset: 4; tab-width: 4; -*- + +/****************************************************************************** + * + * file: DocBookOutput.h + * + * Copyright (c) 2004, Michael E. Smoot + * All rights reverved. + * + * See the file COPYING in the top directory of this distribution for + * more information. + * + * THE SOFTWARE IS PROVIDED _AS IS_, WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + *****************************************************************************/ + +#ifndef TCLAP_DOCBOOKOUTPUT_H +#define TCLAP_DOCBOOKOUTPUT_H + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace TCLAP { + +/** + * A class that generates DocBook output for usage() method for the + * given CmdLine and its Args. + */ +class DocBookOutput : public CmdLineOutput +{ + + public: + + /** + * Prints the usage to stdout. Can be overridden to + * produce alternative behavior. + * \param c - The CmdLine object the output is generated for. + */ + virtual void usage(CmdLineInterface& c); + + /** + * Prints the version to stdout. Can be overridden + * to produce alternative behavior. + * \param c - The CmdLine object the output is generated for. + */ + virtual void version(CmdLineInterface& c); + + /** + * Prints (to stderr) an error message, short usage + * Can be overridden to produce alternative behavior. + * \param c - The CmdLine object the output is generated for. + * \param e - The ArgException that caused the failure. + */ + virtual void failure(CmdLineInterface& c, + ArgException& e ); + + protected: + + /** + * Substitutes the char r for string x in string s. + * \param s - The string to operate on. + * \param r - The char to replace. + * \param x - What to replace r with. + */ + void substituteSpecialChars( std::string& s, char r, std::string& x ); + void removeChar( std::string& s, char r); + void basename( std::string& s ); + + void printShortArg(Arg* it); + void printLongArg(Arg* it); + + char theDelimiter; +}; + + +inline void DocBookOutput::version(CmdLineInterface& _cmd) +{ + std::cout << _cmd.getVersion() << std::endl; +} + +inline void DocBookOutput::usage(CmdLineInterface& _cmd ) +{ + std::list argList = _cmd.getArgList(); + std::string progName = _cmd.getProgramName(); + std::string xversion = _cmd.getVersion(); + theDelimiter = _cmd.getDelimiter(); + XorHandler xorHandler = _cmd.getXorHandler(); + std::vector< std::vector > xorList = xorHandler.getXorList(); + basename(progName); + + std::cout << "" << std::endl; + std::cout << "" << std::endl << std::endl; + + std::cout << "" << std::endl; + + std::cout << "" << std::endl; + std::cout << "" << progName << "" << std::endl; + std::cout << "1" << std::endl; + std::cout << "" << std::endl; + + std::cout << "" << std::endl; + std::cout << "" << progName << "" << std::endl; + std::cout << "" << _cmd.getMessage() << "" << std::endl; + std::cout << "" << std::endl; + + std::cout << "" << std::endl; + std::cout << "" << std::endl; + + std::cout << "" << progName << "" << std::endl; + + // xor + for ( int i = 0; (unsigned int)i < xorList.size(); i++ ) + { + std::cout << "" << std::endl; + for ( ArgVectorIterator it = xorList[i].begin(); + it != xorList[i].end(); it++ ) + printShortArg((*it)); + + std::cout << "" << std::endl; + } + + // rest of args + for (ArgListIterator it = argList.begin(); it != argList.end(); it++) + if ( !xorHandler.contains( (*it) ) ) + printShortArg((*it)); + + std::cout << "" << std::endl; + std::cout << "" << std::endl; + + std::cout << "" << std::endl; + std::cout << "Description" << std::endl; + std::cout << "" << std::endl; + std::cout << _cmd.getMessage() << std::endl; + std::cout << "" << std::endl; + std::cout << "" << std::endl; + + std::cout << "" << std::endl; + std::cout << "Options" << std::endl; + + std::cout << "" << std::endl; + + for (ArgListIterator it = argList.begin(); it != argList.end(); it++) + printLongArg((*it)); + + std::cout << "" << std::endl; + std::cout << "" << std::endl; + + std::cout << "" << std::endl; + std::cout << "Version" << std::endl; + std::cout << "" << std::endl; + std::cout << xversion << std::endl; + std::cout << "" << std::endl; + std::cout << "" << std::endl; + + std::cout << "" << std::endl; + +} + +inline void DocBookOutput::failure( CmdLineInterface& _cmd, + ArgException& e ) +{ + static_cast(_cmd); // unused + std::cout << e.what() << std::endl; + throw ExitException(1); +} + +inline void DocBookOutput::substituteSpecialChars( std::string& s, + char r, + std::string& x ) +{ + size_t p; + while ( (p = s.find_first_of(r)) != std::string::npos ) + { + s.erase(p,1); + s.insert(p,x); + } +} + +inline void DocBookOutput::removeChar( std::string& s, char r) +{ + size_t p; + while ( (p = s.find_first_of(r)) != std::string::npos ) + { + s.erase(p,1); + } +} + +inline void DocBookOutput::basename( std::string& s ) +{ + size_t p = s.find_last_of('/'); + if ( p != std::string::npos ) + { + s.erase(0, p + 1); + } +} + +inline void DocBookOutput::printShortArg(Arg* a) +{ + std::string lt = "<"; + std::string gt = ">"; + + std::string id = a->shortID(); + substituteSpecialChars(id,'<',lt); + substituteSpecialChars(id,'>',gt); + removeChar(id,'['); + removeChar(id,']'); + + std::string choice = "opt"; + if ( a->isRequired() ) + choice = "plain"; + + std::cout << "acceptsMultipleValues() ) + std::cout << " rep='repeat'"; + + + std::cout << '>'; + if ( !a->getFlag().empty() ) + std::cout << a->flagStartChar() << a->getFlag(); + else + std::cout << a->nameStartString() << a->getName(); + if ( a->isValueRequired() ) + { + std::string arg = a->shortID(); + removeChar(arg,'['); + removeChar(arg,']'); + removeChar(arg,'<'); + removeChar(arg,'>'); + arg.erase(0, arg.find_last_of(theDelimiter) + 1); + std::cout << theDelimiter; + std::cout << "" << arg << ""; + } + std::cout << "" << std::endl; + +} + +inline void DocBookOutput::printLongArg(Arg* a) +{ + std::string lt = "<"; + std::string gt = ">"; + + std::string desc = a->getDescription(); + substituteSpecialChars(desc,'<',lt); + substituteSpecialChars(desc,'>',gt); + + std::cout << "" << std::endl; + + if ( !a->getFlag().empty() ) + { + std::cout << "" << std::endl; + std::cout << "" << std::endl; + std::cout << "" << std::endl; + } + + std::cout << "" << std::endl; + std::cout << "" << std::endl; + std::cout << "" << std::endl; + + std::cout << "" << std::endl; + std::cout << "" << std::endl; + std::cout << desc << std::endl; + std::cout << "" << std::endl; + std::cout << "" << std::endl; + + std::cout << "" << std::endl; +} + +} //namespace TCLAP +#endif diff --git a/deps/tclap-1.2.1/tclap/HelpVisitor.h b/deps/tclap-1.2.1/tclap/HelpVisitor.h new file mode 100644 index 0000000..cc3bd07 --- /dev/null +++ b/deps/tclap-1.2.1/tclap/HelpVisitor.h @@ -0,0 +1,76 @@ + +/****************************************************************************** + * + * file: HelpVisitor.h + * + * Copyright (c) 2003, Michael E. Smoot . + * All rights reverved. + * + * See the file COPYING in the top directory of this distribution for + * more information. + * + * THE SOFTWARE IS PROVIDED _AS IS_, WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + *****************************************************************************/ + +#ifndef TCLAP_HELP_VISITOR_H +#define TCLAP_HELP_VISITOR_H + +#include +#include +#include + +namespace TCLAP { + +/** + * A Visitor object that calls the usage method of the given CmdLineOutput + * object for the specified CmdLine object. + */ +class HelpVisitor: public Visitor +{ + private: + /** + * Prevent accidental copying. + */ + HelpVisitor(const HelpVisitor& rhs); + HelpVisitor& operator=(const HelpVisitor& rhs); + + protected: + + /** + * The CmdLine the output will be generated for. + */ + CmdLineInterface* _cmd; + + /** + * The output object. + */ + CmdLineOutput** _out; + + public: + + /** + * Constructor. + * \param cmd - The CmdLine the output will be generated for. + * \param out - The type of output. + */ + HelpVisitor(CmdLineInterface* cmd, CmdLineOutput** out) + : Visitor(), _cmd( cmd ), _out( out ) { } + + /** + * Calls the usage method of the CmdLineOutput for the + * specified CmdLine. + */ + void visit() { (*_out)->usage(*_cmd); throw ExitException(0); } + +}; + +} + +#endif diff --git a/deps/tclap-1.2.1/tclap/IgnoreRestVisitor.h b/deps/tclap-1.2.1/tclap/IgnoreRestVisitor.h new file mode 100644 index 0000000..e328649 --- /dev/null +++ b/deps/tclap-1.2.1/tclap/IgnoreRestVisitor.h @@ -0,0 +1,52 @@ + +/****************************************************************************** + * + * file: IgnoreRestVisitor.h + * + * Copyright (c) 2003, Michael E. Smoot . + * All rights reverved. + * + * See the file COPYING in the top directory of this distribution for + * more information. + * + * THE SOFTWARE IS PROVIDED _AS IS_, WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + *****************************************************************************/ + + +#ifndef TCLAP_IGNORE_REST_VISITOR_H +#define TCLAP_IGNORE_REST_VISITOR_H + +#include +#include + +namespace TCLAP { + +/** + * A Vistor that tells the CmdLine to begin ignoring arguments after + * this one is parsed. + */ +class IgnoreRestVisitor: public Visitor +{ + public: + + /** + * Constructor. + */ + IgnoreRestVisitor() : Visitor() {} + + /** + * Sets Arg::_ignoreRest. + */ + void visit() { Arg::beginIgnoring(); } +}; + +} + +#endif diff --git a/deps/tclap-1.2.1/tclap/MultiArg.h b/deps/tclap-1.2.1/tclap/MultiArg.h new file mode 100644 index 0000000..34bb2d7 --- /dev/null +++ b/deps/tclap-1.2.1/tclap/MultiArg.h @@ -0,0 +1,433 @@ +/****************************************************************************** + * + * file: MultiArg.h + * + * Copyright (c) 2003, Michael E. Smoot . + * Copyright (c) 2004, Michael E. Smoot, Daniel Aarno. + * All rights reverved. + * + * See the file COPYING in the top directory of this distribution for + * more information. + * + * THE SOFTWARE IS PROVIDED _AS IS_, WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + *****************************************************************************/ + + +#ifndef TCLAP_MULTIPLE_ARGUMENT_H +#define TCLAP_MULTIPLE_ARGUMENT_H + +#include +#include + +#include +#include + +namespace TCLAP { +/** + * An argument that allows multiple values of type T to be specified. Very + * similar to a ValueArg, except a vector of values will be returned + * instead of just one. + */ +template +class MultiArg : public Arg +{ +public: + typedef std::vector container_type; + typedef typename container_type::iterator iterator; + typedef typename container_type::const_iterator const_iterator; + +protected: + + /** + * The list of values parsed from the CmdLine. + */ + std::vector _values; + + /** + * The description of type T to be used in the usage. + */ + std::string _typeDesc; + + /** + * A list of constraint on this Arg. + */ + Constraint* _constraint; + + /** + * Extracts the value from the string. + * Attempts to parse string as type T, if this fails an exception + * is thrown. + * \param val - The string to be read. + */ + void _extractValue( const std::string& val ); + + /** + * Used by XorHandler to decide whether to keep parsing for this arg. + */ + bool _allowMore; + +public: + + /** + * Constructor. + * \param flag - The one character flag that identifies this + * argument on the command line. + * \param name - A one word name for the argument. Can be + * used as a long flag on the command line. + * \param desc - A description of what the argument is for or + * does. + * \param req - Whether the argument is required on the command + * line. + * \param typeDesc - A short, human readable description of the + * type that this object expects. This is used in the generation + * of the USAGE statement. The goal is to be helpful to the end user + * of the program. + * \param v - An optional visitor. You probably should not + * use this unless you have a very good reason. + */ + MultiArg( const std::string& flag, + const std::string& name, + const std::string& desc, + bool req, + const std::string& typeDesc, + Visitor* v = NULL); + + /** + * Constructor. + * \param flag - The one character flag that identifies this + * argument on the command line. + * \param name - A one word name for the argument. Can be + * used as a long flag on the command line. + * \param desc - A description of what the argument is for or + * does. + * \param req - Whether the argument is required on the command + * line. + * \param typeDesc - A short, human readable description of the + * type that this object expects. This is used in the generation + * of the USAGE statement. The goal is to be helpful to the end user + * of the program. + * \param parser - A CmdLine parser object to add this Arg to + * \param v - An optional visitor. You probably should not + * use this unless you have a very good reason. + */ + MultiArg( const std::string& flag, + const std::string& name, + const std::string& desc, + bool req, + const std::string& typeDesc, + CmdLineInterface& parser, + Visitor* v = NULL ); + + /** + * Constructor. + * \param flag - The one character flag that identifies this + * argument on the command line. + * \param name - A one word name for the argument. Can be + * used as a long flag on the command line. + * \param desc - A description of what the argument is for or + * does. + * \param req - Whether the argument is required on the command + * line. + * \param constraint - A pointer to a Constraint object used + * to constrain this Arg. + * \param v - An optional visitor. You probably should not + * use this unless you have a very good reason. + */ + MultiArg( const std::string& flag, + const std::string& name, + const std::string& desc, + bool req, + Constraint* constraint, + Visitor* v = NULL ); + + /** + * Constructor. + * \param flag - The one character flag that identifies this + * argument on the command line. + * \param name - A one word name for the argument. Can be + * used as a long flag on the command line. + * \param desc - A description of what the argument is for or + * does. + * \param req - Whether the argument is required on the command + * line. + * \param constraint - A pointer to a Constraint object used + * to constrain this Arg. + * \param parser - A CmdLine parser object to add this Arg to + * \param v - An optional visitor. You probably should not + * use this unless you have a very good reason. + */ + MultiArg( const std::string& flag, + const std::string& name, + const std::string& desc, + bool req, + Constraint* constraint, + CmdLineInterface& parser, + Visitor* v = NULL ); + + /** + * Handles the processing of the argument. + * This re-implements the Arg version of this method to set the + * _value of the argument appropriately. It knows the difference + * between labeled and unlabeled. + * \param i - Pointer the the current argument in the list. + * \param args - Mutable list of strings. Passed from main(). + */ + virtual bool processArg(int* i, std::vector& args); + + /** + * Returns a vector of type T containing the values parsed from + * the command line. + */ + const std::vector& getValue(); + + /** + * Returns an iterator over the values parsed from the command + * line. + */ + const_iterator begin() const { return _values.begin(); } + + /** + * Returns the end of the values parsed from the command + * line. + */ + const_iterator end() const { return _values.end(); } + + /** + * Returns the a short id string. Used in the usage. + * \param val - value to be used. + */ + virtual std::string shortID(const std::string& val="val") const; + + /** + * Returns the a long id string. Used in the usage. + * \param val - value to be used. + */ + virtual std::string longID(const std::string& val="val") const; + + /** + * Once we've matched the first value, then the arg is no longer + * required. + */ + virtual bool isRequired() const; + + virtual bool allowMore(); + + virtual void reset(); + +private: + /** + * Prevent accidental copying + */ + MultiArg(const MultiArg& rhs); + MultiArg& operator=(const MultiArg& rhs); + +}; + +template +MultiArg::MultiArg(const std::string& flag, + const std::string& name, + const std::string& desc, + bool req, + const std::string& typeDesc, + Visitor* v) : + Arg( flag, name, desc, req, true, v ), + _values(std::vector()), + _typeDesc( typeDesc ), + _constraint( NULL ), + _allowMore(false) +{ + _acceptsMultipleValues = true; +} + +template +MultiArg::MultiArg(const std::string& flag, + const std::string& name, + const std::string& desc, + bool req, + const std::string& typeDesc, + CmdLineInterface& parser, + Visitor* v) +: Arg( flag, name, desc, req, true, v ), + _values(std::vector()), + _typeDesc( typeDesc ), + _constraint( NULL ), + _allowMore(false) +{ + parser.add( this ); + _acceptsMultipleValues = true; +} + +/** + * + */ +template +MultiArg::MultiArg(const std::string& flag, + const std::string& name, + const std::string& desc, + bool req, + Constraint* constraint, + Visitor* v) +: Arg( flag, name, desc, req, true, v ), + _values(std::vector()), + _typeDesc( constraint->shortID() ), + _constraint( constraint ), + _allowMore(false) +{ + _acceptsMultipleValues = true; +} + +template +MultiArg::MultiArg(const std::string& flag, + const std::string& name, + const std::string& desc, + bool req, + Constraint* constraint, + CmdLineInterface& parser, + Visitor* v) +: Arg( flag, name, desc, req, true, v ), + _values(std::vector()), + _typeDesc( constraint->shortID() ), + _constraint( constraint ), + _allowMore(false) +{ + parser.add( this ); + _acceptsMultipleValues = true; +} + +template +const std::vector& MultiArg::getValue() { return _values; } + +template +bool MultiArg::processArg(int *i, std::vector& args) +{ + if ( _ignoreable && Arg::ignoreRest() ) + return false; + + if ( _hasBlanks( args[*i] ) ) + return false; + + std::string flag = args[*i]; + std::string value = ""; + + trimFlag( flag, value ); + + if ( argMatches( flag ) ) + { + if ( Arg::delimiter() != ' ' && value == "" ) + throw( ArgParseException( + "Couldn't find delimiter for this argument!", + toString() ) ); + + // always take the first one, regardless of start string + if ( value == "" ) + { + (*i)++; + if ( static_cast(*i) < args.size() ) + _extractValue( args[*i] ); + else + throw( ArgParseException("Missing a value for this argument!", + toString() ) ); + } + else + _extractValue( value ); + + /* + // continuing taking the args until we hit one with a start string + while ( (unsigned int)(*i)+1 < args.size() && + args[(*i)+1].find_first_of( Arg::flagStartString() ) != 0 && + args[(*i)+1].find_first_of( Arg::nameStartString() ) != 0 ) + _extractValue( args[++(*i)] ); + */ + + _alreadySet = true; + _checkWithVisitor(); + + return true; + } + else + return false; +} + +/** + * + */ +template +std::string MultiArg::shortID(const std::string& val) const +{ + static_cast(val); // Ignore input, don't warn + return Arg::shortID(_typeDesc) + " ... "; +} + +/** + * + */ +template +std::string MultiArg::longID(const std::string& val) const +{ + static_cast(val); // Ignore input, don't warn + return Arg::longID(_typeDesc) + " (accepted multiple times)"; +} + +/** + * Once we've matched the first value, then the arg is no longer + * required. + */ +template +bool MultiArg::isRequired() const +{ + if ( _required ) + { + if ( _values.size() > 1 ) + return false; + else + return true; + } + else + return false; + +} + +template +void MultiArg::_extractValue( const std::string& val ) +{ + try { + T tmp; + ExtractValue(tmp, val, typename ArgTraits::ValueCategory()); + _values.push_back(tmp); + } catch( ArgParseException &e) { + throw ArgParseException(e.error(), toString()); + } + + if ( _constraint != NULL ) + if ( ! _constraint->check( _values.back() ) ) + throw( CmdLineParseException( "Value '" + val + + "' does not meet constraint: " + + _constraint->description(), + toString() ) ); +} + +template +bool MultiArg::allowMore() +{ + bool am = _allowMore; + _allowMore = true; + return am; +} + +template +void MultiArg::reset() +{ + Arg::reset(); + _values.clear(); +} + +} // namespace TCLAP + +#endif diff --git a/deps/tclap-1.2.1/tclap/MultiSwitchArg.h b/deps/tclap-1.2.1/tclap/MultiSwitchArg.h new file mode 100644 index 0000000..8820b64 --- /dev/null +++ b/deps/tclap-1.2.1/tclap/MultiSwitchArg.h @@ -0,0 +1,216 @@ + +/****************************************************************************** +* +* file: MultiSwitchArg.h +* +* Copyright (c) 2003, Michael E. Smoot . +* Copyright (c) 2004, Michael E. Smoot, Daniel Aarno. +* Copyright (c) 2005, Michael E. Smoot, Daniel Aarno, Erik Zeek. +* All rights reverved. +* +* See the file COPYING in the top directory of this distribution for +* more information. +* +* THE SOFTWARE IS PROVIDED _AS IS_, WITHOUT WARRANTY OF ANY KIND, EXPRESS +* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +* DEALINGS IN THE SOFTWARE. +* +*****************************************************************************/ + + +#ifndef TCLAP_MULTI_SWITCH_ARG_H +#define TCLAP_MULTI_SWITCH_ARG_H + +#include +#include + +#include + +namespace TCLAP { + +/** +* A multiple switch argument. If the switch is set on the command line, then +* the getValue method will return the number of times the switch appears. +*/ +class MultiSwitchArg : public SwitchArg +{ + protected: + + /** + * The value of the switch. + */ + int _value; + + /** + * Used to support the reset() method so that ValueArg can be + * reset to their constructed value. + */ + int _default; + + public: + + /** + * MultiSwitchArg constructor. + * \param flag - The one character flag that identifies this + * argument on the command line. + * \param name - A one word name for the argument. Can be + * used as a long flag on the command line. + * \param desc - A description of what the argument is for or + * does. + * \param init - Optional. The initial/default value of this Arg. + * Defaults to 0. + * \param v - An optional visitor. You probably should not + * use this unless you have a very good reason. + */ + MultiSwitchArg(const std::string& flag, + const std::string& name, + const std::string& desc, + int init = 0, + Visitor* v = NULL); + + + /** + * MultiSwitchArg constructor. + * \param flag - The one character flag that identifies this + * argument on the command line. + * \param name - A one word name for the argument. Can be + * used as a long flag on the command line. + * \param desc - A description of what the argument is for or + * does. + * \param parser - A CmdLine parser object to add this Arg to + * \param init - Optional. The initial/default value of this Arg. + * Defaults to 0. + * \param v - An optional visitor. You probably should not + * use this unless you have a very good reason. + */ + MultiSwitchArg(const std::string& flag, + const std::string& name, + const std::string& desc, + CmdLineInterface& parser, + int init = 0, + Visitor* v = NULL); + + + /** + * Handles the processing of the argument. + * This re-implements the SwitchArg version of this method to set the + * _value of the argument appropriately. + * \param i - Pointer the the current argument in the list. + * \param args - Mutable list of strings. Passed + * in from main(). + */ + virtual bool processArg(int* i, std::vector& args); + + /** + * Returns int, the number of times the switch has been set. + */ + int getValue(); + + /** + * Returns the shortID for this Arg. + */ + std::string shortID(const std::string& val) const; + + /** + * Returns the longID for this Arg. + */ + std::string longID(const std::string& val) const; + + void reset(); + +}; + +////////////////////////////////////////////////////////////////////// +//BEGIN MultiSwitchArg.cpp +////////////////////////////////////////////////////////////////////// +inline MultiSwitchArg::MultiSwitchArg(const std::string& flag, + const std::string& name, + const std::string& desc, + int init, + Visitor* v ) +: SwitchArg(flag, name, desc, false, v), +_value( init ), +_default( init ) +{ } + +inline MultiSwitchArg::MultiSwitchArg(const std::string& flag, + const std::string& name, + const std::string& desc, + CmdLineInterface& parser, + int init, + Visitor* v ) +: SwitchArg(flag, name, desc, false, v), +_value( init ), +_default( init ) +{ + parser.add( this ); +} + +inline int MultiSwitchArg::getValue() { return _value; } + +inline bool MultiSwitchArg::processArg(int *i, std::vector& args) +{ + if ( _ignoreable && Arg::ignoreRest() ) + return false; + + if ( argMatches( args[*i] )) + { + // so the isSet() method will work + _alreadySet = true; + + // Matched argument: increment value. + ++_value; + + _checkWithVisitor(); + + return true; + } + else if ( combinedSwitchesMatch( args[*i] ) ) + { + // so the isSet() method will work + _alreadySet = true; + + // Matched argument: increment value. + ++_value; + + // Check for more in argument and increment value. + while ( combinedSwitchesMatch( args[*i] ) ) + ++_value; + + _checkWithVisitor(); + + return false; + } + else + return false; +} + +inline std::string +MultiSwitchArg::shortID(const std::string& val) const +{ + return Arg::shortID(val) + " ... "; +} + +inline std::string +MultiSwitchArg::longID(const std::string& val) const +{ + return Arg::longID(val) + " (accepted multiple times)"; +} + +inline void +MultiSwitchArg::reset() +{ + MultiSwitchArg::_value = MultiSwitchArg::_default; +} + +////////////////////////////////////////////////////////////////////// +//END MultiSwitchArg.cpp +////////////////////////////////////////////////////////////////////// + +} //namespace TCLAP + +#endif diff --git a/deps/tclap-1.2.1/tclap/OptionalUnlabeledTracker.h b/deps/tclap-1.2.1/tclap/OptionalUnlabeledTracker.h new file mode 100644 index 0000000..8174c5f --- /dev/null +++ b/deps/tclap-1.2.1/tclap/OptionalUnlabeledTracker.h @@ -0,0 +1,62 @@ + + +/****************************************************************************** + * + * file: OptionalUnlabeledTracker.h + * + * Copyright (c) 2005, Michael E. Smoot . + * All rights reverved. + * + * See the file COPYING in the top directory of this distribution for + * more information. + * + * THE SOFTWARE IS PROVIDED _AS IS_, WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + *****************************************************************************/ + + +#ifndef TCLAP_OPTIONAL_UNLABELED_TRACKER_H +#define TCLAP_OPTIONAL_UNLABELED_TRACKER_H + +#include + +namespace TCLAP { + +class OptionalUnlabeledTracker +{ + + public: + + static void check( bool req, const std::string& argName ); + + static void gotOptional() { alreadyOptionalRef() = true; } + + static bool& alreadyOptional() { return alreadyOptionalRef(); } + + private: + + static bool& alreadyOptionalRef() { static bool ct = false; return ct; } +}; + + +inline void OptionalUnlabeledTracker::check( bool req, const std::string& argName ) +{ + if ( OptionalUnlabeledTracker::alreadyOptional() ) + throw( SpecificationException( + "You can't specify ANY Unlabeled Arg following an optional Unlabeled Arg", + argName ) ); + + if ( !req ) + OptionalUnlabeledTracker::gotOptional(); +} + + +} // namespace TCLAP + +#endif diff --git a/deps/tclap-1.2.1/tclap/StandardTraits.h b/deps/tclap-1.2.1/tclap/StandardTraits.h new file mode 100644 index 0000000..46d7f6f --- /dev/null +++ b/deps/tclap-1.2.1/tclap/StandardTraits.h @@ -0,0 +1,208 @@ +// -*- Mode: c++; c-basic-offset: 4; tab-width: 4; -*- + +/****************************************************************************** + * + * file: StandardTraits.h + * + * Copyright (c) 2007, Daniel Aarno, Michael E. Smoot . + * All rights reverved. + * + * See the file COPYING in the top directory of this distribution for + * more information. + * + * THE SOFTWARE IS PROVIDED _AS IS_, WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + *****************************************************************************/ + +// This is an internal tclap file, you should probably not have to +// include this directly + +#ifndef TCLAP_STANDARD_TRAITS_H +#define TCLAP_STANDARD_TRAITS_H + +#ifdef HAVE_CONFIG_H +#include // To check for long long +#endif + +// If Microsoft has already typedef'd wchar_t as an unsigned +// short, then compiles will break because it's as if we're +// creating ArgTraits twice for unsigned short. Thus... +#ifdef _MSC_VER +#ifndef _NATIVE_WCHAR_T_DEFINED +#define TCLAP_DONT_DECLARE_WCHAR_T_ARGTRAITS +#endif +#endif + +namespace TCLAP { + +// ====================================================================== +// Integer types +// ====================================================================== + +/** + * longs have value-like semantics. + */ +template<> +struct ArgTraits { + typedef ValueLike ValueCategory; +}; + +/** + * ints have value-like semantics. + */ +template<> +struct ArgTraits { + typedef ValueLike ValueCategory; +}; + +/** + * shorts have value-like semantics. + */ +template<> +struct ArgTraits { + typedef ValueLike ValueCategory; +}; + +/** + * chars have value-like semantics. + */ +template<> +struct ArgTraits { + typedef ValueLike ValueCategory; +}; + +#ifdef HAVE_LONG_LONG +/** + * long longs have value-like semantics. + */ +template<> +struct ArgTraits { + typedef ValueLike ValueCategory; +}; +#endif + +// ====================================================================== +// Unsigned integer types +// ====================================================================== + +/** + * unsigned longs have value-like semantics. + */ +template<> +struct ArgTraits { + typedef ValueLike ValueCategory; +}; + +/** + * unsigned ints have value-like semantics. + */ +template<> +struct ArgTraits { + typedef ValueLike ValueCategory; +}; + +/** + * unsigned shorts have value-like semantics. + */ +template<> +struct ArgTraits { + typedef ValueLike ValueCategory; +}; + +/** + * unsigned chars have value-like semantics. + */ +template<> +struct ArgTraits { + typedef ValueLike ValueCategory; +}; + +// Microsoft implements size_t awkwardly. +#if defined(_MSC_VER) && defined(_M_X64) +/** + * size_ts have value-like semantics. + */ +template<> +struct ArgTraits { + typedef ValueLike ValueCategory; +}; +#endif + + +#ifdef HAVE_LONG_LONG +/** + * unsigned long longs have value-like semantics. + */ +template<> +struct ArgTraits { + typedef ValueLike ValueCategory; +}; +#endif + +// ====================================================================== +// Float types +// ====================================================================== + +/** + * floats have value-like semantics. + */ +template<> +struct ArgTraits { + typedef ValueLike ValueCategory; +}; + +/** + * doubles have value-like semantics. + */ +template<> +struct ArgTraits { + typedef ValueLike ValueCategory; +}; + +// ====================================================================== +// Other types +// ====================================================================== + +/** + * bools have value-like semantics. + */ +template<> +struct ArgTraits { + typedef ValueLike ValueCategory; +}; + + +/** + * wchar_ts have value-like semantics. + */ +#ifndef TCLAP_DONT_DECLARE_WCHAR_T_ARGTRAITS +template<> +struct ArgTraits { + typedef ValueLike ValueCategory; +}; +#endif + +/** + * Strings have string like argument traits. + */ +template<> +struct ArgTraits { + typedef StringLike ValueCategory; +}; + +template +void SetString(T &dst, const std::string &src) +{ + dst = src; +} + +} // namespace + +#endif + diff --git a/deps/tclap-1.2.1/tclap/StdOutput.h b/deps/tclap-1.2.1/tclap/StdOutput.h new file mode 100644 index 0000000..35f7b99 --- /dev/null +++ b/deps/tclap-1.2.1/tclap/StdOutput.h @@ -0,0 +1,298 @@ +// -*- Mode: c++; c-basic-offset: 4; tab-width: 4; -*- + +/****************************************************************************** + * + * file: StdOutput.h + * + * Copyright (c) 2004, Michael E. Smoot + * All rights reverved. + * + * See the file COPYING in the top directory of this distribution for + * more information. + * + * THE SOFTWARE IS PROVIDED _AS IS_, WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + *****************************************************************************/ + +#ifndef TCLAP_STDCMDLINEOUTPUT_H +#define TCLAP_STDCMDLINEOUTPUT_H + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace TCLAP { + +/** + * A class that isolates any output from the CmdLine object so that it + * may be easily modified. + */ +class StdOutput : public CmdLineOutput +{ + + public: + + /** + * Prints the usage to stdout. Can be overridden to + * produce alternative behavior. + * \param c - The CmdLine object the output is generated for. + */ + virtual void usage(CmdLineInterface& c); + + /** + * Prints the version to stdout. Can be overridden + * to produce alternative behavior. + * \param c - The CmdLine object the output is generated for. + */ + virtual void version(CmdLineInterface& c); + + /** + * Prints (to stderr) an error message, short usage + * Can be overridden to produce alternative behavior. + * \param c - The CmdLine object the output is generated for. + * \param e - The ArgException that caused the failure. + */ + virtual void failure(CmdLineInterface& c, + ArgException& e ); + + protected: + + /** + * Writes a brief usage message with short args. + * \param c - The CmdLine object the output is generated for. + * \param os - The stream to write the message to. + */ + void _shortUsage( CmdLineInterface& c, std::ostream& os ) const; + + /** + * Writes a longer usage message with long and short args, + * provides descriptions and prints message. + * \param c - The CmdLine object the output is generated for. + * \param os - The stream to write the message to. + */ + void _longUsage( CmdLineInterface& c, std::ostream& os ) const; + + /** + * This function inserts line breaks and indents long strings + * according the params input. It will only break lines at spaces, + * commas and pipes. + * \param os - The stream to be printed to. + * \param s - The string to be printed. + * \param maxWidth - The maxWidth allowed for the output line. + * \param indentSpaces - The number of spaces to indent the first line. + * \param secondLineOffset - The number of spaces to indent the second + * and all subsequent lines in addition to indentSpaces. + */ + void spacePrint( std::ostream& os, + const std::string& s, + int maxWidth, + int indentSpaces, + int secondLineOffset ) const; + +}; + + +inline void StdOutput::version(CmdLineInterface& _cmd) +{ + std::string progName = _cmd.getProgramName(); + std::string xversion = _cmd.getVersion(); + + std::cout << std::endl << progName << " version: " + << xversion << std::endl << std::endl; +} + +inline void StdOutput::usage(CmdLineInterface& _cmd ) +{ + std::cout << std::endl << "USAGE: " << std::endl << std::endl; + + _shortUsage( _cmd, std::cout ); + + std::cout << std::endl << std::endl << "Where: " << std::endl << std::endl; + + _longUsage( _cmd, std::cout ); + + std::cout << std::endl; + +} + +inline void StdOutput::failure( CmdLineInterface& _cmd, + ArgException& e ) +{ + std::string progName = _cmd.getProgramName(); + + std::cerr << "PARSE ERROR: " << e.argId() << std::endl + << " " << e.error() << std::endl << std::endl; + + if ( _cmd.hasHelpAndVersion() ) + { + std::cerr << "Brief USAGE: " << std::endl; + + _shortUsage( _cmd, std::cerr ); + + std::cerr << std::endl << "For complete USAGE and HELP type: " + << std::endl << " " << progName << " --help" + << std::endl << std::endl; + } + else + usage(_cmd); + + throw ExitException(1); +} + +inline void +StdOutput::_shortUsage( CmdLineInterface& _cmd, + std::ostream& os ) const +{ + std::list argList = _cmd.getArgList(); + std::string progName = _cmd.getProgramName(); + XorHandler xorHandler = _cmd.getXorHandler(); + std::vector< std::vector > xorList = xorHandler.getXorList(); + + std::string s = progName + " "; + + // first the xor + for ( int i = 0; static_cast(i) < xorList.size(); i++ ) + { + s += " {"; + for ( ArgVectorIterator it = xorList[i].begin(); + it != xorList[i].end(); it++ ) + s += (*it)->shortID() + "|"; + + s[s.length()-1] = '}'; + } + + // then the rest + for (ArgListIterator it = argList.begin(); it != argList.end(); it++) + if ( !xorHandler.contains( (*it) ) ) + s += " " + (*it)->shortID(); + + // if the program name is too long, then adjust the second line offset + int secondLineOffset = static_cast(progName.length()) + 2; + if ( secondLineOffset > 75/2 ) + secondLineOffset = static_cast(75/2); + + spacePrint( os, s, 75, 3, secondLineOffset ); +} + +inline void +StdOutput::_longUsage( CmdLineInterface& _cmd, + std::ostream& os ) const +{ + std::list argList = _cmd.getArgList(); + std::string message = _cmd.getMessage(); + XorHandler xorHandler = _cmd.getXorHandler(); + std::vector< std::vector > xorList = xorHandler.getXorList(); + + // first the xor + for ( int i = 0; static_cast(i) < xorList.size(); i++ ) + { + for ( ArgVectorIterator it = xorList[i].begin(); + it != xorList[i].end(); + it++ ) + { + spacePrint( os, (*it)->longID(), 75, 3, 3 ); + spacePrint( os, (*it)->getDescription(), 75, 5, 0 ); + + if ( it+1 != xorList[i].end() ) + spacePrint(os, "-- OR --", 75, 9, 0); + } + os << std::endl << std::endl; + } + + // then the rest + for (ArgListIterator it = argList.begin(); it != argList.end(); it++) + if ( !xorHandler.contains( (*it) ) ) + { + spacePrint( os, (*it)->longID(), 75, 3, 3 ); + spacePrint( os, (*it)->getDescription(), 75, 5, 0 ); + os << std::endl; + } + + os << std::endl; + + spacePrint( os, message, 75, 3, 0 ); +} + +inline void StdOutput::spacePrint( std::ostream& os, + const std::string& s, + int maxWidth, + int indentSpaces, + int secondLineOffset ) const +{ + int len = static_cast(s.length()); + + if ( (len + indentSpaces > maxWidth) && maxWidth > 0 ) + { + int allowedLen = maxWidth - indentSpaces; + int start = 0; + while ( start < len ) + { + // find the substring length + // int stringLen = std::min( len - start, allowedLen ); + // doing it this way to support a VisualC++ 2005 bug + using namespace std; + int stringLen = min( len - start, allowedLen ); + + // trim the length so it doesn't end in middle of a word + if ( stringLen == allowedLen ) + while ( stringLen >= 0 && + s[stringLen+start] != ' ' && + s[stringLen+start] != ',' && + s[stringLen+start] != '|' ) + stringLen--; + + // ok, the word is longer than the line, so just split + // wherever the line ends + if ( stringLen <= 0 ) + stringLen = allowedLen; + + // check for newlines + for ( int i = 0; i < stringLen; i++ ) + if ( s[start+i] == '\n' ) + stringLen = i+1; + + // print the indent + for ( int i = 0; i < indentSpaces; i++ ) + os << " "; + + if ( start == 0 ) + { + // handle second line offsets + indentSpaces += secondLineOffset; + + // adjust allowed len + allowedLen -= secondLineOffset; + } + + os << s.substr(start,stringLen) << std::endl; + + // so we don't start a line with a space + while ( s[stringLen+start] == ' ' && start < len ) + start++; + + start += stringLen; + } + } + else + { + for ( int i = 0; i < indentSpaces; i++ ) + os << " "; + os << s << std::endl; + } +} + +} //namespace TCLAP +#endif diff --git a/deps/tclap-1.2.1/tclap/SwitchArg.h b/deps/tclap-1.2.1/tclap/SwitchArg.h new file mode 100644 index 0000000..3916109 --- /dev/null +++ b/deps/tclap-1.2.1/tclap/SwitchArg.h @@ -0,0 +1,266 @@ + +/****************************************************************************** + * + * file: SwitchArg.h + * + * Copyright (c) 2003, Michael E. Smoot . + * Copyright (c) 2004, Michael E. Smoot, Daniel Aarno. + * All rights reverved. + * + * See the file COPYING in the top directory of this distribution for + * more information. + * + * THE SOFTWARE IS PROVIDED _AS IS_, WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + *****************************************************************************/ + + +#ifndef TCLAP_SWITCH_ARG_H +#define TCLAP_SWITCH_ARG_H + +#include +#include + +#include + +namespace TCLAP { + +/** + * A simple switch argument. If the switch is set on the command line, then + * the getValue method will return the opposite of the default value for the + * switch. + */ +class SwitchArg : public Arg +{ + protected: + + /** + * The value of the switch. + */ + bool _value; + + /** + * Used to support the reset() method so that ValueArg can be + * reset to their constructed value. + */ + bool _default; + + public: + + /** + * SwitchArg constructor. + * \param flag - The one character flag that identifies this + * argument on the command line. + * \param name - A one word name for the argument. Can be + * used as a long flag on the command line. + * \param desc - A description of what the argument is for or + * does. + * \param def - The default value for this Switch. + * \param v - An optional visitor. You probably should not + * use this unless you have a very good reason. + */ + SwitchArg(const std::string& flag, + const std::string& name, + const std::string& desc, + bool def = false, + Visitor* v = NULL); + + + /** + * SwitchArg constructor. + * \param flag - The one character flag that identifies this + * argument on the command line. + * \param name - A one word name for the argument. Can be + * used as a long flag on the command line. + * \param desc - A description of what the argument is for or + * does. + * \param parser - A CmdLine parser object to add this Arg to + * \param def - The default value for this Switch. + * \param v - An optional visitor. You probably should not + * use this unless you have a very good reason. + */ + SwitchArg(const std::string& flag, + const std::string& name, + const std::string& desc, + CmdLineInterface& parser, + bool def = false, + Visitor* v = NULL); + + + /** + * Handles the processing of the argument. + * This re-implements the Arg version of this method to set the + * _value of the argument appropriately. + * \param i - Pointer the the current argument in the list. + * \param args - Mutable list of strings. Passed + * in from main(). + */ + virtual bool processArg(int* i, std::vector& args); + + /** + * Checks a string to see if any of the chars in the string + * match the flag for this Switch. + */ + bool combinedSwitchesMatch(std::string& combined); + + /** + * Returns bool, whether or not the switch has been set. + */ + bool getValue(); + + virtual void reset(); + + private: + /** + * Checks to see if we've found the last match in + * a combined string. + */ + bool lastCombined(std::string& combined); + + /** + * Does the common processing of processArg. + */ + void commonProcessing(); +}; + +////////////////////////////////////////////////////////////////////// +//BEGIN SwitchArg.cpp +////////////////////////////////////////////////////////////////////// +inline SwitchArg::SwitchArg(const std::string& flag, + const std::string& name, + const std::string& desc, + bool default_val, + Visitor* v ) +: Arg(flag, name, desc, false, false, v), + _value( default_val ), + _default( default_val ) +{ } + +inline SwitchArg::SwitchArg(const std::string& flag, + const std::string& name, + const std::string& desc, + CmdLineInterface& parser, + bool default_val, + Visitor* v ) +: Arg(flag, name, desc, false, false, v), + _value( default_val ), + _default(default_val) +{ + parser.add( this ); +} + +inline bool SwitchArg::getValue() { return _value; } + +inline bool SwitchArg::lastCombined(std::string& combinedSwitches ) +{ + for ( unsigned int i = 1; i < combinedSwitches.length(); i++ ) + if ( combinedSwitches[i] != Arg::blankChar() ) + return false; + + return true; +} + +inline bool SwitchArg::combinedSwitchesMatch(std::string& combinedSwitches ) +{ + // make sure this is actually a combined switch + if ( combinedSwitches.length() > 0 && + combinedSwitches[0] != Arg::flagStartString()[0] ) + return false; + + // make sure it isn't a long name + if ( combinedSwitches.substr( 0, Arg::nameStartString().length() ) == + Arg::nameStartString() ) + return false; + + // make sure the delimiter isn't in the string + if ( combinedSwitches.find_first_of( Arg::delimiter() ) != std::string::npos ) + return false; + + // ok, we're not specifying a ValueArg, so we know that we have + // a combined switch list. + for ( unsigned int i = 1; i < combinedSwitches.length(); i++ ) + if ( _flag.length() > 0 && + combinedSwitches[i] == _flag[0] && + _flag[0] != Arg::flagStartString()[0] ) + { + // update the combined switches so this one is no longer present + // this is necessary so that no unlabeled args are matched + // later in the processing. + //combinedSwitches.erase(i,1); + combinedSwitches[i] = Arg::blankChar(); + return true; + } + + // none of the switches passed in the list match. + return false; +} + +inline void SwitchArg::commonProcessing() +{ + if ( _xorSet ) + throw(CmdLineParseException( + "Mutually exclusive argument already set!", toString())); + + if ( _alreadySet ) + throw(CmdLineParseException("Argument already set!", toString())); + + _alreadySet = true; + + if ( _value == true ) + _value = false; + else + _value = true; + + _checkWithVisitor(); +} + +inline bool SwitchArg::processArg(int *i, std::vector& args) +{ + if ( _ignoreable && Arg::ignoreRest() ) + return false; + + // if the whole string matches the flag or name string + if ( argMatches( args[*i] ) ) + { + commonProcessing(); + + return true; + } + // if a substring matches the flag as part of a combination + else if ( combinedSwitchesMatch( args[*i] ) ) + { + // check again to ensure we don't misinterpret + // this as a MultiSwitchArg + if ( combinedSwitchesMatch( args[*i] ) ) + throw(CmdLineParseException("Argument already set!", + toString())); + + commonProcessing(); + + // We only want to return true if we've found the last combined + // match in the string, otherwise we return true so that other + // switches in the combination will have a chance to match. + return lastCombined( args[*i] ); + } + else + return false; +} + +inline void SwitchArg::reset() +{ + Arg::reset(); + _value = _default; +} +////////////////////////////////////////////////////////////////////// +//End SwitchArg.cpp +////////////////////////////////////////////////////////////////////// + +} //namespace TCLAP + +#endif diff --git a/deps/tclap-1.2.1/tclap/UnlabeledMultiArg.h b/deps/tclap-1.2.1/tclap/UnlabeledMultiArg.h new file mode 100644 index 0000000..d5e1781 --- /dev/null +++ b/deps/tclap-1.2.1/tclap/UnlabeledMultiArg.h @@ -0,0 +1,301 @@ + +/****************************************************************************** + * + * file: UnlabeledMultiArg.h + * + * Copyright (c) 2003, Michael E. Smoot. + * All rights reverved. + * + * See the file COPYING in the top directory of this distribution for + * more information. + * + * THE SOFTWARE IS PROVIDED _AS IS_, WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + *****************************************************************************/ + + +#ifndef TCLAP_MULTIPLE_UNLABELED_ARGUMENT_H +#define TCLAP_MULTIPLE_UNLABELED_ARGUMENT_H + +#include +#include + +#include +#include + +namespace TCLAP { + +/** + * Just like a MultiArg, except that the arguments are unlabeled. Basically, + * this Arg will slurp up everything that hasn't been matched to another + * Arg. + */ +template +class UnlabeledMultiArg : public MultiArg +{ + + // If compiler has two stage name lookup (as gcc >= 3.4 does) + // this is requried to prevent undef. symbols + using MultiArg::_ignoreable; + using MultiArg::_hasBlanks; + using MultiArg::_extractValue; + using MultiArg::_typeDesc; + using MultiArg::_name; + using MultiArg::_description; + using MultiArg::_alreadySet; + using MultiArg::toString; + + public: + + /** + * Constructor. + * \param name - The name of the Arg. Note that this is used for + * identification, not as a long flag. + * \param desc - A description of what the argument is for or + * does. + * \param req - Whether the argument is required on the command + * line. + * \param typeDesc - A short, human readable description of the + * type that this object expects. This is used in the generation + * of the USAGE statement. The goal is to be helpful to the end user + * of the program. + * \param ignoreable - Whether or not this argument can be ignored + * using the "--" flag. + * \param v - An optional visitor. You probably should not + * use this unless you have a very good reason. + */ + UnlabeledMultiArg( const std::string& name, + const std::string& desc, + bool req, + const std::string& typeDesc, + bool ignoreable = false, + Visitor* v = NULL ); + /** + * Constructor. + * \param name - The name of the Arg. Note that this is used for + * identification, not as a long flag. + * \param desc - A description of what the argument is for or + * does. + * \param req - Whether the argument is required on the command + * line. + * \param typeDesc - A short, human readable description of the + * type that this object expects. This is used in the generation + * of the USAGE statement. The goal is to be helpful to the end user + * of the program. + * \param parser - A CmdLine parser object to add this Arg to + * \param ignoreable - Whether or not this argument can be ignored + * using the "--" flag. + * \param v - An optional visitor. You probably should not + * use this unless you have a very good reason. + */ + UnlabeledMultiArg( const std::string& name, + const std::string& desc, + bool req, + const std::string& typeDesc, + CmdLineInterface& parser, + bool ignoreable = false, + Visitor* v = NULL ); + + /** + * Constructor. + * \param name - The name of the Arg. Note that this is used for + * identification, not as a long flag. + * \param desc - A description of what the argument is for or + * does. + * \param req - Whether the argument is required on the command + * line. + * \param constraint - A pointer to a Constraint object used + * to constrain this Arg. + * \param ignoreable - Whether or not this argument can be ignored + * using the "--" flag. + * \param v - An optional visitor. You probably should not + * use this unless you have a very good reason. + */ + UnlabeledMultiArg( const std::string& name, + const std::string& desc, + bool req, + Constraint* constraint, + bool ignoreable = false, + Visitor* v = NULL ); + + /** + * Constructor. + * \param name - The name of the Arg. Note that this is used for + * identification, not as a long flag. + * \param desc - A description of what the argument is for or + * does. + * \param req - Whether the argument is required on the command + * line. + * \param constraint - A pointer to a Constraint object used + * to constrain this Arg. + * \param parser - A CmdLine parser object to add this Arg to + * \param ignoreable - Whether or not this argument can be ignored + * using the "--" flag. + * \param v - An optional visitor. You probably should not + * use this unless you have a very good reason. + */ + UnlabeledMultiArg( const std::string& name, + const std::string& desc, + bool req, + Constraint* constraint, + CmdLineInterface& parser, + bool ignoreable = false, + Visitor* v = NULL ); + + /** + * Handles the processing of the argument. + * This re-implements the Arg version of this method to set the + * _value of the argument appropriately. It knows the difference + * between labeled and unlabeled. + * \param i - Pointer the the current argument in the list. + * \param args - Mutable list of strings. Passed from main(). + */ + virtual bool processArg(int* i, std::vector& args); + + /** + * Returns the a short id string. Used in the usage. + * \param val - value to be used. + */ + virtual std::string shortID(const std::string& val="val") const; + + /** + * Returns the a long id string. Used in the usage. + * \param val - value to be used. + */ + virtual std::string longID(const std::string& val="val") const; + + /** + * Opertor ==. + * \param a - The Arg to be compared to this. + */ + virtual bool operator==(const Arg& a) const; + + /** + * Pushes this to back of list rather than front. + * \param argList - The list this should be added to. + */ + virtual void addToList( std::list& argList ) const; +}; + +template +UnlabeledMultiArg::UnlabeledMultiArg(const std::string& name, + const std::string& desc, + bool req, + const std::string& typeDesc, + bool ignoreable, + Visitor* v) +: MultiArg("", name, desc, req, typeDesc, v) +{ + _ignoreable = ignoreable; + OptionalUnlabeledTracker::check(true, toString()); +} + +template +UnlabeledMultiArg::UnlabeledMultiArg(const std::string& name, + const std::string& desc, + bool req, + const std::string& typeDesc, + CmdLineInterface& parser, + bool ignoreable, + Visitor* v) +: MultiArg("", name, desc, req, typeDesc, v) +{ + _ignoreable = ignoreable; + OptionalUnlabeledTracker::check(true, toString()); + parser.add( this ); +} + + +template +UnlabeledMultiArg::UnlabeledMultiArg(const std::string& name, + const std::string& desc, + bool req, + Constraint* constraint, + bool ignoreable, + Visitor* v) +: MultiArg("", name, desc, req, constraint, v) +{ + _ignoreable = ignoreable; + OptionalUnlabeledTracker::check(true, toString()); +} + +template +UnlabeledMultiArg::UnlabeledMultiArg(const std::string& name, + const std::string& desc, + bool req, + Constraint* constraint, + CmdLineInterface& parser, + bool ignoreable, + Visitor* v) +: MultiArg("", name, desc, req, constraint, v) +{ + _ignoreable = ignoreable; + OptionalUnlabeledTracker::check(true, toString()); + parser.add( this ); +} + + +template +bool UnlabeledMultiArg::processArg(int *i, std::vector& args) +{ + + if ( _hasBlanks( args[*i] ) ) + return false; + + // never ignore an unlabeled multi arg + + + // always take the first value, regardless of the start string + _extractValue( args[(*i)] ); + + /* + // continue taking args until we hit the end or a start string + while ( (unsigned int)(*i)+1 < args.size() && + args[(*i)+1].find_first_of( Arg::flagStartString() ) != 0 && + args[(*i)+1].find_first_of( Arg::nameStartString() ) != 0 ) + _extractValue( args[++(*i)] ); + */ + + _alreadySet = true; + + return true; +} + +template +std::string UnlabeledMultiArg::shortID(const std::string& val) const +{ + static_cast(val); // Ignore input, don't warn + return std::string("<") + _typeDesc + "> ..."; +} + +template +std::string UnlabeledMultiArg::longID(const std::string& val) const +{ + static_cast(val); // Ignore input, don't warn + return std::string("<") + _typeDesc + "> (accepted multiple times)"; +} + +template +bool UnlabeledMultiArg::operator==(const Arg& a) const +{ + if ( _name == a.getName() || _description == a.getDescription() ) + return true; + else + return false; +} + +template +void UnlabeledMultiArg::addToList( std::list& argList ) const +{ + argList.push_back( const_cast(static_cast(this)) ); +} + +} + +#endif diff --git a/deps/tclap-1.2.1/tclap/UnlabeledValueArg.h b/deps/tclap-1.2.1/tclap/UnlabeledValueArg.h new file mode 100644 index 0000000..5721d61 --- /dev/null +++ b/deps/tclap-1.2.1/tclap/UnlabeledValueArg.h @@ -0,0 +1,340 @@ + +/****************************************************************************** + * + * file: UnlabeledValueArg.h + * + * Copyright (c) 2003, Michael E. Smoot . + * Copyright (c) 2004, Michael E. Smoot, Daniel Aarno. + * All rights reverved. + * + * See the file COPYING in the top directory of this distribution for + * more information. + * + * THE SOFTWARE IS PROVIDED _AS IS_, WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + *****************************************************************************/ + + +#ifndef TCLAP_UNLABELED_VALUE_ARGUMENT_H +#define TCLAP_UNLABELED_VALUE_ARGUMENT_H + +#include +#include + +#include +#include + + +namespace TCLAP { + +/** + * The basic unlabeled argument that parses a value. + * This is a template class, which means the type T defines the type + * that a given object will attempt to parse when an UnlabeledValueArg + * is reached in the list of args that the CmdLine iterates over. + */ +template +class UnlabeledValueArg : public ValueArg +{ + + // If compiler has two stage name lookup (as gcc >= 3.4 does) + // this is requried to prevent undef. symbols + using ValueArg::_ignoreable; + using ValueArg::_hasBlanks; + using ValueArg::_extractValue; + using ValueArg::_typeDesc; + using ValueArg::_name; + using ValueArg::_description; + using ValueArg::_alreadySet; + using ValueArg::toString; + + public: + + /** + * UnlabeledValueArg constructor. + * \param name - A one word name for the argument. Note that this is used for + * identification, not as a long flag. + * \param desc - A description of what the argument is for or + * does. + * \param req - Whether the argument is required on the command + * line. + * \param value - The default value assigned to this argument if it + * is not present on the command line. + * \param typeDesc - A short, human readable description of the + * type that this object expects. This is used in the generation + * of the USAGE statement. The goal is to be helpful to the end user + * of the program. + * \param ignoreable - Allows you to specify that this argument can be + * ignored if the '--' flag is set. This defaults to false (cannot + * be ignored) and should generally stay that way unless you have + * some special need for certain arguments to be ignored. + * \param v - Optional Vistor. You should leave this blank unless + * you have a very good reason. + */ + UnlabeledValueArg( const std::string& name, + const std::string& desc, + bool req, + T value, + const std::string& typeDesc, + bool ignoreable = false, + Visitor* v = NULL); + + /** + * UnlabeledValueArg constructor. + * \param name - A one word name for the argument. Note that this is used for + * identification, not as a long flag. + * \param desc - A description of what the argument is for or + * does. + * \param req - Whether the argument is required on the command + * line. + * \param value - The default value assigned to this argument if it + * is not present on the command line. + * \param typeDesc - A short, human readable description of the + * type that this object expects. This is used in the generation + * of the USAGE statement. The goal is to be helpful to the end user + * of the program. + * \param parser - A CmdLine parser object to add this Arg to + * \param ignoreable - Allows you to specify that this argument can be + * ignored if the '--' flag is set. This defaults to false (cannot + * be ignored) and should generally stay that way unless you have + * some special need for certain arguments to be ignored. + * \param v - Optional Vistor. You should leave this blank unless + * you have a very good reason. + */ + UnlabeledValueArg( const std::string& name, + const std::string& desc, + bool req, + T value, + const std::string& typeDesc, + CmdLineInterface& parser, + bool ignoreable = false, + Visitor* v = NULL ); + + /** + * UnlabeledValueArg constructor. + * \param name - A one word name for the argument. Note that this is used for + * identification, not as a long flag. + * \param desc - A description of what the argument is for or + * does. + * \param req - Whether the argument is required on the command + * line. + * \param value - The default value assigned to this argument if it + * is not present on the command line. + * \param constraint - A pointer to a Constraint object used + * to constrain this Arg. + * \param ignoreable - Allows you to specify that this argument can be + * ignored if the '--' flag is set. This defaults to false (cannot + * be ignored) and should generally stay that way unless you have + * some special need for certain arguments to be ignored. + * \param v - Optional Vistor. You should leave this blank unless + * you have a very good reason. + */ + UnlabeledValueArg( const std::string& name, + const std::string& desc, + bool req, + T value, + Constraint* constraint, + bool ignoreable = false, + Visitor* v = NULL ); + + + /** + * UnlabeledValueArg constructor. + * \param name - A one word name for the argument. Note that this is used for + * identification, not as a long flag. + * \param desc - A description of what the argument is for or + * does. + * \param req - Whether the argument is required on the command + * line. + * \param value - The default value assigned to this argument if it + * is not present on the command line. + * \param constraint - A pointer to a Constraint object used + * to constrain this Arg. + * \param parser - A CmdLine parser object to add this Arg to + * \param ignoreable - Allows you to specify that this argument can be + * ignored if the '--' flag is set. This defaults to false (cannot + * be ignored) and should generally stay that way unless you have + * some special need for certain arguments to be ignored. + * \param v - Optional Vistor. You should leave this blank unless + * you have a very good reason. + */ + UnlabeledValueArg( const std::string& name, + const std::string& desc, + bool req, + T value, + Constraint* constraint, + CmdLineInterface& parser, + bool ignoreable = false, + Visitor* v = NULL); + + /** + * Handles the processing of the argument. + * This re-implements the Arg version of this method to set the + * _value of the argument appropriately. Handling specific to + * unlabled arguments. + * \param i - Pointer the the current argument in the list. + * \param args - Mutable list of strings. + */ + virtual bool processArg(int* i, std::vector& args); + + /** + * Overrides shortID for specific behavior. + */ + virtual std::string shortID(const std::string& val="val") const; + + /** + * Overrides longID for specific behavior. + */ + virtual std::string longID(const std::string& val="val") const; + + /** + * Overrides operator== for specific behavior. + */ + virtual bool operator==(const Arg& a ) const; + + /** + * Instead of pushing to the front of list, push to the back. + * \param argList - The list to add this to. + */ + virtual void addToList( std::list& argList ) const; + +}; + +/** + * Constructor implemenation. + */ +template +UnlabeledValueArg::UnlabeledValueArg(const std::string& name, + const std::string& desc, + bool req, + T val, + const std::string& typeDesc, + bool ignoreable, + Visitor* v) +: ValueArg("", name, desc, req, val, typeDesc, v) +{ + _ignoreable = ignoreable; + + OptionalUnlabeledTracker::check(req, toString()); + +} + +template +UnlabeledValueArg::UnlabeledValueArg(const std::string& name, + const std::string& desc, + bool req, + T val, + const std::string& typeDesc, + CmdLineInterface& parser, + bool ignoreable, + Visitor* v) +: ValueArg("", name, desc, req, val, typeDesc, v) +{ + _ignoreable = ignoreable; + OptionalUnlabeledTracker::check(req, toString()); + parser.add( this ); +} + +/** + * Constructor implemenation. + */ +template +UnlabeledValueArg::UnlabeledValueArg(const std::string& name, + const std::string& desc, + bool req, + T val, + Constraint* constraint, + bool ignoreable, + Visitor* v) +: ValueArg("", name, desc, req, val, constraint, v) +{ + _ignoreable = ignoreable; + OptionalUnlabeledTracker::check(req, toString()); +} + +template +UnlabeledValueArg::UnlabeledValueArg(const std::string& name, + const std::string& desc, + bool req, + T val, + Constraint* constraint, + CmdLineInterface& parser, + bool ignoreable, + Visitor* v) +: ValueArg("", name, desc, req, val, constraint, v) +{ + _ignoreable = ignoreable; + OptionalUnlabeledTracker::check(req, toString()); + parser.add( this ); +} + +/** + * Implementation of processArg(). + */ +template +bool UnlabeledValueArg::processArg(int *i, std::vector& args) +{ + + if ( _alreadySet ) + return false; + + if ( _hasBlanks( args[*i] ) ) + return false; + + // never ignore an unlabeled arg + + _extractValue( args[*i] ); + _alreadySet = true; + return true; +} + +/** + * Overriding shortID for specific output. + */ +template +std::string UnlabeledValueArg::shortID(const std::string& val) const +{ + static_cast(val); // Ignore input, don't warn + return std::string("<") + _typeDesc + ">"; +} + +/** + * Overriding longID for specific output. + */ +template +std::string UnlabeledValueArg::longID(const std::string& val) const +{ + static_cast(val); // Ignore input, don't warn + + // Ideally we would like to be able to use RTTI to return the name + // of the type required for this argument. However, g++ at least, + // doesn't appear to return terribly useful "names" of the types. + return std::string("<") + _typeDesc + ">"; +} + +/** + * Overriding operator== for specific behavior. + */ +template +bool UnlabeledValueArg::operator==(const Arg& a ) const +{ + if ( _name == a.getName() || _description == a.getDescription() ) + return true; + else + return false; +} + +template +void UnlabeledValueArg::addToList( std::list& argList ) const +{ + argList.push_back( const_cast(static_cast(this)) ); +} + +} +#endif diff --git a/deps/tclap-1.2.1/tclap/ValueArg.h b/deps/tclap-1.2.1/tclap/ValueArg.h new file mode 100644 index 0000000..7ac2952 --- /dev/null +++ b/deps/tclap-1.2.1/tclap/ValueArg.h @@ -0,0 +1,425 @@ +/****************************************************************************** + * + * file: ValueArg.h + * + * Copyright (c) 2003, Michael E. Smoot . + * Copyright (c) 2004, Michael E. Smoot, Daniel Aarno. + * All rights reverved. + * + * See the file COPYING in the top directory of this distribution for + * more information. + * + * THE SOFTWARE IS PROVIDED _AS IS_, WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + *****************************************************************************/ + + +#ifndef TCLAP_VALUE_ARGUMENT_H +#define TCLAP_VALUE_ARGUMENT_H + +#include +#include + +#include +#include + +namespace TCLAP { + +/** + * The basic labeled argument that parses a value. + * This is a template class, which means the type T defines the type + * that a given object will attempt to parse when the flag/name is matched + * on the command line. While there is nothing stopping you from creating + * an unflagged ValueArg, it is unwise and would cause significant problems. + * Instead use an UnlabeledValueArg. + */ +template +class ValueArg : public Arg +{ + protected: + + /** + * The value parsed from the command line. + * Can be of any type, as long as the >> operator for the type + * is defined. + */ + T _value; + + /** + * Used to support the reset() method so that ValueArg can be + * reset to their constructed value. + */ + T _default; + + /** + * A human readable description of the type to be parsed. + * This is a hack, plain and simple. Ideally we would use RTTI to + * return the name of type T, but until there is some sort of + * consistent support for human readable names, we are left to our + * own devices. + */ + std::string _typeDesc; + + /** + * A Constraint this Arg must conform to. + */ + Constraint* _constraint; + + /** + * Extracts the value from the string. + * Attempts to parse string as type T, if this fails an exception + * is thrown. + * \param val - value to be parsed. + */ + void _extractValue( const std::string& val ); + + public: + + /** + * Labeled ValueArg constructor. + * You could conceivably call this constructor with a blank flag, + * but that would make you a bad person. It would also cause + * an exception to be thrown. If you want an unlabeled argument, + * use the other constructor. + * \param flag - The one character flag that identifies this + * argument on the command line. + * \param name - A one word name for the argument. Can be + * used as a long flag on the command line. + * \param desc - A description of what the argument is for or + * does. + * \param req - Whether the argument is required on the command + * line. + * \param value - The default value assigned to this argument if it + * is not present on the command line. + * \param typeDesc - A short, human readable description of the + * type that this object expects. This is used in the generation + * of the USAGE statement. The goal is to be helpful to the end user + * of the program. + * \param v - An optional visitor. You probably should not + * use this unless you have a very good reason. + */ + ValueArg( const std::string& flag, + const std::string& name, + const std::string& desc, + bool req, + T value, + const std::string& typeDesc, + Visitor* v = NULL); + + + /** + * Labeled ValueArg constructor. + * You could conceivably call this constructor with a blank flag, + * but that would make you a bad person. It would also cause + * an exception to be thrown. If you want an unlabeled argument, + * use the other constructor. + * \param flag - The one character flag that identifies this + * argument on the command line. + * \param name - A one word name for the argument. Can be + * used as a long flag on the command line. + * \param desc - A description of what the argument is for or + * does. + * \param req - Whether the argument is required on the command + * line. + * \param value - The default value assigned to this argument if it + * is not present on the command line. + * \param typeDesc - A short, human readable description of the + * type that this object expects. This is used in the generation + * of the USAGE statement. The goal is to be helpful to the end user + * of the program. + * \param parser - A CmdLine parser object to add this Arg to + * \param v - An optional visitor. You probably should not + * use this unless you have a very good reason. + */ + ValueArg( const std::string& flag, + const std::string& name, + const std::string& desc, + bool req, + T value, + const std::string& typeDesc, + CmdLineInterface& parser, + Visitor* v = NULL ); + + /** + * Labeled ValueArg constructor. + * You could conceivably call this constructor with a blank flag, + * but that would make you a bad person. It would also cause + * an exception to be thrown. If you want an unlabeled argument, + * use the other constructor. + * \param flag - The one character flag that identifies this + * argument on the command line. + * \param name - A one word name for the argument. Can be + * used as a long flag on the command line. + * \param desc - A description of what the argument is for or + * does. + * \param req - Whether the argument is required on the command + * line. + * \param value - The default value assigned to this argument if it + * is not present on the command line. + * \param constraint - A pointer to a Constraint object used + * to constrain this Arg. + * \param parser - A CmdLine parser object to add this Arg to. + * \param v - An optional visitor. You probably should not + * use this unless you have a very good reason. + */ + ValueArg( const std::string& flag, + const std::string& name, + const std::string& desc, + bool req, + T value, + Constraint* constraint, + CmdLineInterface& parser, + Visitor* v = NULL ); + + /** + * Labeled ValueArg constructor. + * You could conceivably call this constructor with a blank flag, + * but that would make you a bad person. It would also cause + * an exception to be thrown. If you want an unlabeled argument, + * use the other constructor. + * \param flag - The one character flag that identifies this + * argument on the command line. + * \param name - A one word name for the argument. Can be + * used as a long flag on the command line. + * \param desc - A description of what the argument is for or + * does. + * \param req - Whether the argument is required on the command + * line. + * \param value - The default value assigned to this argument if it + * is not present on the command line. + * \param constraint - A pointer to a Constraint object used + * to constrain this Arg. + * \param v - An optional visitor. You probably should not + * use this unless you have a very good reason. + */ + ValueArg( const std::string& flag, + const std::string& name, + const std::string& desc, + bool req, + T value, + Constraint* constraint, + Visitor* v = NULL ); + + /** + * Handles the processing of the argument. + * This re-implements the Arg version of this method to set the + * _value of the argument appropriately. It knows the difference + * between labeled and unlabeled. + * \param i - Pointer the the current argument in the list. + * \param args - Mutable list of strings. Passed + * in from main(). + */ + virtual bool processArg(int* i, std::vector& args); + + /** + * Returns the value of the argument. + */ + T& getValue() ; + + /** + * Specialization of shortID. + * \param val - value to be used. + */ + virtual std::string shortID(const std::string& val = "val") const; + + /** + * Specialization of longID. + * \param val - value to be used. + */ + virtual std::string longID(const std::string& val = "val") const; + + virtual void reset() ; + +private: + /** + * Prevent accidental copying + */ + ValueArg(const ValueArg& rhs); + ValueArg& operator=(const ValueArg& rhs); +}; + + +/** + * Constructor implementation. + */ +template +ValueArg::ValueArg(const std::string& flag, + const std::string& name, + const std::string& desc, + bool req, + T val, + const std::string& typeDesc, + Visitor* v) +: Arg(flag, name, desc, req, true, v), + _value( val ), + _default( val ), + _typeDesc( typeDesc ), + _constraint( NULL ) +{ } + +template +ValueArg::ValueArg(const std::string& flag, + const std::string& name, + const std::string& desc, + bool req, + T val, + const std::string& typeDesc, + CmdLineInterface& parser, + Visitor* v) +: Arg(flag, name, desc, req, true, v), + _value( val ), + _default( val ), + _typeDesc( typeDesc ), + _constraint( NULL ) +{ + parser.add( this ); +} + +template +ValueArg::ValueArg(const std::string& flag, + const std::string& name, + const std::string& desc, + bool req, + T val, + Constraint* constraint, + Visitor* v) +: Arg(flag, name, desc, req, true, v), + _value( val ), + _default( val ), + _typeDesc( constraint->shortID() ), + _constraint( constraint ) +{ } + +template +ValueArg::ValueArg(const std::string& flag, + const std::string& name, + const std::string& desc, + bool req, + T val, + Constraint* constraint, + CmdLineInterface& parser, + Visitor* v) +: Arg(flag, name, desc, req, true, v), + _value( val ), + _default( val ), + _typeDesc( constraint->shortID() ), + _constraint( constraint ) +{ + parser.add( this ); +} + + +/** + * Implementation of getValue(). + */ +template +T& ValueArg::getValue() { return _value; } + +/** + * Implementation of processArg(). + */ +template +bool ValueArg::processArg(int *i, std::vector& args) +{ + if ( _ignoreable && Arg::ignoreRest() ) + return false; + + if ( _hasBlanks( args[*i] ) ) + return false; + + std::string flag = args[*i]; + + std::string value = ""; + trimFlag( flag, value ); + + if ( argMatches( flag ) ) + { + if ( _alreadySet ) + { + if ( _xorSet ) + throw( CmdLineParseException( + "Mutually exclusive argument already set!", + toString()) ); + else + throw( CmdLineParseException("Argument already set!", + toString()) ); + } + + if ( Arg::delimiter() != ' ' && value == "" ) + throw( ArgParseException( + "Couldn't find delimiter for this argument!", + toString() ) ); + + if ( value == "" ) + { + (*i)++; + if ( static_cast(*i) < args.size() ) + _extractValue( args[*i] ); + else + throw( ArgParseException("Missing a value for this argument!", + toString() ) ); + } + else + _extractValue( value ); + + _alreadySet = true; + _checkWithVisitor(); + return true; + } + else + return false; +} + +/** + * Implementation of shortID. + */ +template +std::string ValueArg::shortID(const std::string& val) const +{ + static_cast(val); // Ignore input, don't warn + return Arg::shortID( _typeDesc ); +} + +/** + * Implementation of longID. + */ +template +std::string ValueArg::longID(const std::string& val) const +{ + static_cast(val); // Ignore input, don't warn + return Arg::longID( _typeDesc ); +} + +template +void ValueArg::_extractValue( const std::string& val ) +{ + try { + ExtractValue(_value, val, typename ArgTraits::ValueCategory()); + } catch( ArgParseException &e) { + throw ArgParseException(e.error(), toString()); + } + + if ( _constraint != NULL ) + if ( ! _constraint->check( _value ) ) + throw( CmdLineParseException( "Value '" + val + + + "' does not meet constraint: " + + _constraint->description(), + toString() ) ); +} + +template +void ValueArg::reset() +{ + Arg::reset(); + _value = _default; +} + +} // namespace TCLAP + +#endif diff --git a/deps/tclap-1.2.1/tclap/ValuesConstraint.h b/deps/tclap-1.2.1/tclap/ValuesConstraint.h new file mode 100644 index 0000000..cb41f64 --- /dev/null +++ b/deps/tclap-1.2.1/tclap/ValuesConstraint.h @@ -0,0 +1,148 @@ + + +/****************************************************************************** + * + * file: ValuesConstraint.h + * + * Copyright (c) 2005, Michael E. Smoot + * All rights reverved. + * + * See the file COPYING in the top directory of this distribution for + * more information. + * + * THE SOFTWARE IS PROVIDED _AS IS_, WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + *****************************************************************************/ + +#ifndef TCLAP_VALUESCONSTRAINT_H +#define TCLAP_VALUESCONSTRAINT_H + +#include +#include +#include + +#ifdef HAVE_CONFIG_H +#include +#else +#define HAVE_SSTREAM +#endif + +#if defined(HAVE_SSTREAM) +#include +#elif defined(HAVE_STRSTREAM) +#include +#else +#error "Need a stringstream (sstream or strstream) to compile!" +#endif + +namespace TCLAP { + +/** + * A Constraint that constrains the Arg to only those values specified + * in the constraint. + */ +template +class ValuesConstraint : public Constraint +{ + + public: + + /** + * Constructor. + * \param allowed - vector of allowed values. + */ + ValuesConstraint(std::vector& allowed); + + /** + * Virtual destructor. + */ + virtual ~ValuesConstraint() {} + + /** + * Returns a description of the Constraint. + */ + virtual std::string description() const; + + /** + * Returns the short ID for the Constraint. + */ + virtual std::string shortID() const; + + /** + * The method used to verify that the value parsed from the command + * line meets the constraint. + * \param value - The value that will be checked. + */ + virtual bool check(const T& value) const; + + protected: + + /** + * The list of valid values. + */ + std::vector _allowed; + + /** + * The string used to describe the allowed values of this constraint. + */ + std::string _typeDesc; + +}; + +template +ValuesConstraint::ValuesConstraint(std::vector& allowed) +: _allowed(allowed), + _typeDesc("") +{ + for ( unsigned int i = 0; i < _allowed.size(); i++ ) + { + +#if defined(HAVE_SSTREAM) + std::ostringstream os; +#elif defined(HAVE_STRSTREAM) + std::ostrstream os; +#else +#error "Need a stringstream (sstream or strstream) to compile!" +#endif + + os << _allowed[i]; + + std::string temp( os.str() ); + + if ( i > 0 ) + _typeDesc += "|"; + _typeDesc += temp; + } +} + +template +bool ValuesConstraint::check( const T& val ) const +{ + if ( std::find(_allowed.begin(),_allowed.end(),val) == _allowed.end() ) + return false; + else + return true; +} + +template +std::string ValuesConstraint::shortID() const +{ + return _typeDesc; +} + +template +std::string ValuesConstraint::description() const +{ + return _typeDesc; +} + + +} //namespace TCLAP +#endif + diff --git a/deps/tclap-1.2.1/tclap/VersionVisitor.h b/deps/tclap-1.2.1/tclap/VersionVisitor.h new file mode 100644 index 0000000..c110d4f --- /dev/null +++ b/deps/tclap-1.2.1/tclap/VersionVisitor.h @@ -0,0 +1,81 @@ +// -*- Mode: c++; c-basic-offset: 4; tab-width: 4; -*- + +/****************************************************************************** + * + * file: VersionVisitor.h + * + * Copyright (c) 2003, Michael E. Smoot . + * All rights reverved. + * + * See the file COPYING in the top directory of this distribution for + * more information. + * + * THE SOFTWARE IS PROVIDED _AS IS_, WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + *****************************************************************************/ + + +#ifndef TCLAP_VERSION_VISITOR_H +#define TCLAP_VERSION_VISITOR_H + +#include +#include +#include + +namespace TCLAP { + +/** + * A Vistor that will call the version method of the given CmdLineOutput + * for the specified CmdLine object and then exit. + */ +class VersionVisitor: public Visitor +{ + private: + /** + * Prevent accidental copying + */ + VersionVisitor(const VersionVisitor& rhs); + VersionVisitor& operator=(const VersionVisitor& rhs); + + protected: + + /** + * The CmdLine of interest. + */ + CmdLineInterface* _cmd; + + /** + * The output object. + */ + CmdLineOutput** _out; + + public: + + /** + * Constructor. + * \param cmd - The CmdLine the output is generated for. + * \param out - The type of output. + */ + VersionVisitor( CmdLineInterface* cmd, CmdLineOutput** out ) + : Visitor(), _cmd( cmd ), _out( out ) { } + + /** + * Calls the version method of the output object using the + * specified CmdLine. + */ + void visit() { + (*_out)->version(*_cmd); + throw ExitException(0); + } + +}; + +} + +#endif diff --git a/deps/tclap-1.2.1/tclap/Visitor.h b/deps/tclap-1.2.1/tclap/Visitor.h new file mode 100644 index 0000000..38ddcbd --- /dev/null +++ b/deps/tclap-1.2.1/tclap/Visitor.h @@ -0,0 +1,53 @@ + +/****************************************************************************** + * + * file: Visitor.h + * + * Copyright (c) 2003, Michael E. Smoot . + * All rights reverved. + * + * See the file COPYING in the top directory of this distribution for + * more information. + * + * THE SOFTWARE IS PROVIDED _AS IS_, WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + *****************************************************************************/ + + +#ifndef TCLAP_VISITOR_H +#define TCLAP_VISITOR_H + +namespace TCLAP { + +/** + * A base class that defines the interface for visitors. + */ +class Visitor +{ + public: + + /** + * Constructor. Does nothing. + */ + Visitor() { } + + /** + * Destructor. Does nothing. + */ + virtual ~Visitor() { } + + /** + * Does nothing. Should be overridden by child. + */ + virtual void visit() { } +}; + +} + +#endif diff --git a/deps/tclap-1.2.1/tclap/XorHandler.h b/deps/tclap-1.2.1/tclap/XorHandler.h new file mode 100644 index 0000000..d9dfad3 --- /dev/null +++ b/deps/tclap-1.2.1/tclap/XorHandler.h @@ -0,0 +1,166 @@ + +/****************************************************************************** + * + * file: XorHandler.h + * + * Copyright (c) 2003, Michael E. Smoot . + * Copyright (c) 2004, Michael E. Smoot, Daniel Aarno. + * All rights reverved. + * + * See the file COPYING in the top directory of this distribution for + * more information. + * + * THE SOFTWARE IS PROVIDED _AS IS_, WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + *****************************************************************************/ + +#ifndef TCLAP_XORHANDLER_H +#define TCLAP_XORHANDLER_H + +#include +#include +#include +#include +#include + +namespace TCLAP { + +/** + * This class handles lists of Arg's that are to be XOR'd on the command + * line. This is used by CmdLine and you shouldn't ever use it. + */ +class XorHandler +{ + protected: + + /** + * The list of of lists of Arg's to be or'd together. + */ + std::vector< std::vector > _orList; + + public: + + /** + * Constructor. Does nothing. + */ + XorHandler( ) : _orList(std::vector< std::vector >()) {} + + /** + * Add a list of Arg*'s that will be orred together. + * \param ors - list of Arg* that will be xor'd. + */ + void add( std::vector& ors ); + + /** + * Checks whether the specified Arg is in one of the xor lists and + * if it does match one, returns the size of the xor list that the + * Arg matched. If the Arg matches, then it also sets the rest of + * the Arg's in the list. You shouldn't use this. + * \param a - The Arg to be checked. + */ + int check( const Arg* a ); + + /** + * Returns the XOR specific short usage. + */ + std::string shortUsage(); + + /** + * Prints the XOR specific long usage. + * \param os - Stream to print to. + */ + void printLongUsage(std::ostream& os); + + /** + * Simply checks whether the Arg is contained in one of the arg + * lists. + * \param a - The Arg to be checked. + */ + bool contains( const Arg* a ); + + std::vector< std::vector >& getXorList(); + +}; + + +////////////////////////////////////////////////////////////////////// +//BEGIN XOR.cpp +////////////////////////////////////////////////////////////////////// +inline void XorHandler::add( std::vector& ors ) +{ + _orList.push_back( ors ); +} + +inline int XorHandler::check( const Arg* a ) +{ + // iterate over each XOR list + for ( int i = 0; static_cast(i) < _orList.size(); i++ ) + { + // if the XOR list contains the arg.. + ArgVectorIterator ait = std::find( _orList[i].begin(), + _orList[i].end(), a ); + if ( ait != _orList[i].end() ) + { + // first check to see if a mutually exclusive switch + // has not already been set + for ( ArgVectorIterator it = _orList[i].begin(); + it != _orList[i].end(); + it++ ) + if ( a != (*it) && (*it)->isSet() ) + throw(CmdLineParseException( + "Mutually exclusive argument already set!", + (*it)->toString())); + + // go through and set each arg that is not a + for ( ArgVectorIterator it = _orList[i].begin(); + it != _orList[i].end(); + it++ ) + if ( a != (*it) ) + (*it)->xorSet(); + + // return the number of required args that have now been set + if ( (*ait)->allowMore() ) + return 0; + else + return static_cast(_orList[i].size()); + } + } + + if ( a->isRequired() ) + return 1; + else + return 0; +} + +inline bool XorHandler::contains( const Arg* a ) +{ + for ( int i = 0; static_cast(i) < _orList.size(); i++ ) + for ( ArgVectorIterator it = _orList[i].begin(); + it != _orList[i].end(); + it++ ) + if ( a == (*it) ) + return true; + + return false; +} + +inline std::vector< std::vector >& XorHandler::getXorList() +{ + return _orList; +} + + + +////////////////////////////////////////////////////////////////////// +//END XOR.cpp +////////////////////////////////////////////////////////////////////// + +} //namespace TCLAP + +#endif diff --git a/deps/tclap-1.2.1/tclap/ZshCompletionOutput.h b/deps/tclap-1.2.1/tclap/ZshCompletionOutput.h new file mode 100644 index 0000000..0b37fc7 --- /dev/null +++ b/deps/tclap-1.2.1/tclap/ZshCompletionOutput.h @@ -0,0 +1,323 @@ +// -*- Mode: c++; c-basic-offset: 4; tab-width: 4; -*- + +/****************************************************************************** + * + * file: ZshCompletionOutput.h + * + * Copyright (c) 2006, Oliver Kiddle + * All rights reverved. + * + * See the file COPYING in the top directory of this distribution for + * more information. + * + * THE SOFTWARE IS PROVIDED _AS IS_, WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + *****************************************************************************/ + +#ifndef TCLAP_ZSHCOMPLETIONOUTPUT_H +#define TCLAP_ZSHCOMPLETIONOUTPUT_H + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace TCLAP { + +/** + * A class that generates a Zsh completion function as output from the usage() + * method for the given CmdLine and its Args. + */ +class ZshCompletionOutput : public CmdLineOutput +{ + + public: + + ZshCompletionOutput(); + + /** + * Prints the usage to stdout. Can be overridden to + * produce alternative behavior. + * \param c - The CmdLine object the output is generated for. + */ + virtual void usage(CmdLineInterface& c); + + /** + * Prints the version to stdout. Can be overridden + * to produce alternative behavior. + * \param c - The CmdLine object the output is generated for. + */ + virtual void version(CmdLineInterface& c); + + /** + * Prints (to stderr) an error message, short usage + * Can be overridden to produce alternative behavior. + * \param c - The CmdLine object the output is generated for. + * \param e - The ArgException that caused the failure. + */ + virtual void failure(CmdLineInterface& c, + ArgException& e ); + + protected: + + void basename( std::string& s ); + void quoteSpecialChars( std::string& s ); + + std::string getMutexList( CmdLineInterface& _cmd, Arg* a ); + void printOption( Arg* it, std::string mutex ); + void printArg( Arg* it ); + + std::map common; + char theDelimiter; +}; + +ZshCompletionOutput::ZshCompletionOutput() +: common(std::map()), + theDelimiter('=') +{ + common["host"] = "_hosts"; + common["hostname"] = "_hosts"; + common["file"] = "_files"; + common["filename"] = "_files"; + common["user"] = "_users"; + common["username"] = "_users"; + common["directory"] = "_directories"; + common["path"] = "_directories"; + common["url"] = "_urls"; +} + +inline void ZshCompletionOutput::version(CmdLineInterface& _cmd) +{ + std::cout << _cmd.getVersion() << std::endl; +} + +inline void ZshCompletionOutput::usage(CmdLineInterface& _cmd ) +{ + std::list argList = _cmd.getArgList(); + std::string progName = _cmd.getProgramName(); + std::string xversion = _cmd.getVersion(); + theDelimiter = _cmd.getDelimiter(); + basename(progName); + + std::cout << "#compdef " << progName << std::endl << std::endl << + "# " << progName << " version " << _cmd.getVersion() << std::endl << std::endl << + "_arguments -s -S"; + + for (ArgListIterator it = argList.begin(); it != argList.end(); it++) + { + if ( (*it)->shortID().at(0) == '<' ) + printArg((*it)); + else if ( (*it)->getFlag() != "-" ) + printOption((*it), getMutexList(_cmd, *it)); + } + + std::cout << std::endl; +} + +inline void ZshCompletionOutput::failure( CmdLineInterface& _cmd, + ArgException& e ) +{ + static_cast(_cmd); // unused + std::cout << e.what() << std::endl; +} + +inline void ZshCompletionOutput::quoteSpecialChars( std::string& s ) +{ + size_t idx = s.find_last_of(':'); + while ( idx != std::string::npos ) + { + s.insert(idx, 1, '\\'); + idx = s.find_last_of(':', idx); + } + idx = s.find_last_of('\''); + while ( idx != std::string::npos ) + { + s.insert(idx, "'\\'"); + if (idx == 0) + idx = std::string::npos; + else + idx = s.find_last_of('\'', --idx); + } +} + +inline void ZshCompletionOutput::basename( std::string& s ) +{ + size_t p = s.find_last_of('/'); + if ( p != std::string::npos ) + { + s.erase(0, p + 1); + } +} + +inline void ZshCompletionOutput::printArg(Arg* a) +{ + static int count = 1; + + std::cout << " \\" << std::endl << " '"; + if ( a->acceptsMultipleValues() ) + std::cout << '*'; + else + std::cout << count++; + std::cout << ':'; + if ( !a->isRequired() ) + std::cout << ':'; + + std::cout << a->getName() << ':'; + std::map::iterator compArg = common.find(a->getName()); + if ( compArg != common.end() ) + { + std::cout << compArg->second; + } + else + { + std::cout << "_guard \"^-*\" " << a->getName(); + } + std::cout << '\''; +} + +inline void ZshCompletionOutput::printOption(Arg* a, std::string mutex) +{ + std::string flag = a->flagStartChar() + a->getFlag(); + std::string name = a->nameStartString() + a->getName(); + std::string desc = a->getDescription(); + + // remove full stop and capitalisation from description as + // this is the convention for zsh function + if (!desc.compare(0, 12, "(required) ")) + { + desc.erase(0, 12); + } + if (!desc.compare(0, 15, "(OR required) ")) + { + desc.erase(0, 15); + } + size_t len = desc.length(); + if (len && desc.at(--len) == '.') + { + desc.erase(len); + } + if (len) + { + desc.replace(0, 1, 1, tolower(desc.at(0))); + } + + std::cout << " \\" << std::endl << " '" << mutex; + + if ( a->getFlag().empty() ) + { + std::cout << name; + } + else + { + std::cout << "'{" << flag << ',' << name << "}'"; + } + if ( theDelimiter == '=' && a->isValueRequired() ) + std::cout << "=-"; + quoteSpecialChars(desc); + std::cout << '[' << desc << ']'; + + if ( a->isValueRequired() ) + { + std::string arg = a->shortID(); + arg.erase(0, arg.find_last_of(theDelimiter) + 1); + if ( arg.at(arg.length()-1) == ']' ) + arg.erase(arg.length()-1); + if ( arg.at(arg.length()-1) == ']' ) + { + arg.erase(arg.length()-1); + } + if ( arg.at(0) == '<' ) + { + arg.erase(arg.length()-1); + arg.erase(0, 1); + } + size_t p = arg.find('|'); + if ( p != std::string::npos ) + { + do + { + arg.replace(p, 1, 1, ' '); + } + while ( (p = arg.find_first_of('|', p)) != std::string::npos ); + quoteSpecialChars(arg); + std::cout << ": :(" << arg << ')'; + } + else + { + std::cout << ':' << arg; + std::map::iterator compArg = common.find(arg); + if ( compArg != common.end() ) + { + std::cout << ':' << compArg->second; + } + } + } + + std::cout << '\''; +} + +inline std::string ZshCompletionOutput::getMutexList( CmdLineInterface& _cmd, Arg* a) +{ + XorHandler xorHandler = _cmd.getXorHandler(); + std::vector< std::vector > xorList = xorHandler.getXorList(); + + if (a->getName() == "help" || a->getName() == "version") + { + return "(-)"; + } + + std::ostringstream list; + if ( a->acceptsMultipleValues() ) + { + list << '*'; + } + + for ( int i = 0; static_cast(i) < xorList.size(); i++ ) + { + for ( ArgVectorIterator it = xorList[i].begin(); + it != xorList[i].end(); + it++) + if ( a == (*it) ) + { + list << '('; + for ( ArgVectorIterator iu = xorList[i].begin(); + iu != xorList[i].end(); + iu++ ) + { + bool notCur = (*iu) != a; + bool hasFlag = !(*iu)->getFlag().empty(); + if ( iu != xorList[i].begin() && (notCur || hasFlag) ) + list << ' '; + if (hasFlag) + list << (*iu)->flagStartChar() << (*iu)->getFlag() << ' '; + if ( notCur || hasFlag ) + list << (*iu)->nameStartString() << (*iu)->getName(); + } + list << ')'; + return list.str(); + } + } + + // wasn't found in xor list + if (!a->getFlag().empty()) { + list << "(" << a->flagStartChar() << a->getFlag() << ' ' << + a->nameStartString() << a->getName() << ')'; + } + + return list.str(); +} + +} //namespace TCLAP +#endif diff --git a/doc/CMakeLists.txt b/doc/CMakeLists.txt index 6d55e47..9213404 100644 --- a/doc/CMakeLists.txt +++ b/doc/CMakeLists.txt @@ -1,11 +1,3 @@ -install( - FILES - opencc.1 - opencc_dict.1 - DESTINATION - ${DIR_SHARE}/man/man1 -) - if(BUILD_DOCUMENTATION) find_package(Doxygen) if (NOT DOXYGEN_FOUND) @@ -37,7 +29,7 @@ if(BUILD_DOCUMENTATION) DIRECTORY ${CMAKE_BINARY_DIR}/doc/html DESTINATION - ${DIR_SHARE_OPENCC}/doc + ${DIR_SHARE_OPENCC}doc ) set_directory_properties( diff --git a/doc/opencc.1 b/doc/opencc.1 deleted file mode 100644 index 21c217b..0000000 --- a/doc/opencc.1 +++ /dev/null @@ -1,27 +0,0 @@ -.TH OPENCC "1" "June 2010" "opencc " "User Commands" -.SH NAME -opencc \- simplified-traditional chinese conversion tool -.SH DESCRIPTION -Open Chinese Convert (OpenCC) Command Line Tool -.SS "Usage:" -.HP -opencc [\-i input_file] [\-o output_file] [\-c config_file] -.HP -\fB\-i\fR -Read original text from input_file. -.HP -\fB\-o\fR -Write converted text to output_file. -.HP -\fB\-c\fR -Load dictionary configuration from config_file. -.IP -Note: -.IP -Text from standard input will be read if input_file is not set and will be written to standard output if output_file is not set. -.IP -Default configuration(zhs2zht.ini) will be load if config_file is not set. -.PP -Open Chinese Convert (OpenCC) Command Line Tool -.SH "SEE ALSO" -.BR iconv (1) diff --git a/doc/opencc.doxy.in b/doc/opencc.doxy.in index fc47999..a93862d 100644 --- a/doc/opencc.doxy.in +++ b/doc/opencc.doxy.in @@ -119,7 +119,7 @@ INLINE_INHERITED_MEMB = NO # path before files name in the file list and in the header files. If set # to NO the shortest path that makes the file name unique will be used. -FULL_PATH_NAMES = YES +FULL_PATH_NAMES = NO # If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag # can be used to strip a user-defined part of the path. Stripping is @@ -209,7 +209,7 @@ TCL_SUBST = # For instance, some of the names that are used will be different. The list # of all members will be omitted, etc. -OPTIMIZE_OUTPUT_FOR_C = YES +OPTIMIZE_OUTPUT_FOR_C = NO # Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java # sources only. Doxygen will then generate output that is more tailored for @@ -329,22 +329,6 @@ INLINE_SIMPLE_STRUCTS = NO TYPEDEF_HIDES_STRUCT = NO -# The SYMBOL_CACHE_SIZE determines the size of the internal cache use to -# determine which symbols to keep in memory and which to flush to disk. -# When the cache is full, less often used symbols will be written to disk. -# For small to medium size projects (<1000 input files) the default value is -# probably good enough. For larger projects a too small cache size can cause -# doxygen to be busy swapping symbols to and from disk most of the time -# causing a significant performance penalty. -# If the system has enough physical memory increasing the cache will improve the -# performance by keeping more symbols in memory. Note that the value works on -# a logarithmic scale so increasing the size by one will roughly double the -# memory usage. The cache size is given by this formula: -# 2^(16+SYMBOL_CACHE_SIZE). The valid range is 0..9, the default is 0, -# corresponding to a cache size of 2^16 = 65536 symbols. - -SYMBOL_CACHE_SIZE = 0 - # Similar to the SYMBOL_CACHE_SIZE the size of the symbol lookup cache can be # set using LOOKUP_CACHE_SIZE. This cache is used to resolve symbols given # their name and scope. Since this can be an expensive process and often the @@ -386,7 +370,7 @@ EXTRACT_STATIC = NO # defined locally in source files will be included in the documentation. # If set to NO only classes defined in header files are included. -EXTRACT_LOCAL_CLASSES = YES +EXTRACT_LOCAL_CLASSES = NO # This flag is only useful for Objective-C code. When set to YES local # methods, which are defined in the implementation section but not in @@ -569,7 +553,7 @@ SHOW_USED_FILES = YES # This will remove the Files entry from the Quick Index and from the # Folder Tree View (if specified). The default is YES. -SHOW_FILES = YES +SHOW_FILES = NO # Set the SHOW_NAMESPACES tag to NO to disable the generation of the # Namespaces page. @@ -615,7 +599,7 @@ CITE_BIB_FILES = # The QUIET tag can be used to turn on/off the messages that are generated # by doxygen. Possible values are YES and NO. If left blank NO is used. -QUIET = NO +QUIET = YES # The WARNINGS tag can be used to turn on/off the warning messages that are # generated by doxygen. Possible values are YES and NO. If left blank @@ -627,7 +611,7 @@ WARNINGS = YES # for undocumented members. If EXTRACT_ALL is set to YES then this flag will # automatically be disabled. -WARN_IF_UNDOCUMENTED = YES +WARN_IF_UNDOCUMENTED = NO # If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for # potential errors in the documentation, such as not documenting some @@ -668,7 +652,7 @@ WARN_LOGFILE = # directories like "/usr/src/myproject". Separate the files or directories # with spaces. -INPUT = @CMAKE_SOURCE_DIR@/src @CMAKE_SOURCE_DIR@/node @CMAKE_SOURCE_DIR@/data @CMAKE_SOURCE_DIR@/README.md +INPUT = @CMAKE_SOURCE_DIR@/src @CMAKE_SOURCE_DIR@/data @CMAKE_SOURCE_DIR@/README.md @CMAKE_SOURCE_DIR@/node/demo.js # This tag can be used to specify the character encoding of the source files # that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is @@ -686,7 +670,7 @@ INPUT_ENCODING = UTF-8 # *.hxx *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.dox *.py # *.f90 *.f *.for *.vhd *.vhdl -FILE_PATTERNS = *.c *.cc *.h *.py *.js +FILE_PATTERNS = *.c *.cpp *.h *.hpp # The RECURSIVE tag can be used to turn specify whether or not subdirectories # should be searched for input files as well. Possible values are YES and NO. @@ -802,7 +786,7 @@ USE_MDFILE_AS_MAINPAGE = README.md # Note: To get rid of all source code in the generated output, make sure also # VERBATIM_HEADERS is set to NO. -SOURCE_BROWSER = YES +SOURCE_BROWSER = NO # Setting the INLINE_SOURCES tag to YES will include the body # of functions and classes directly in the documentation. @@ -1471,18 +1455,6 @@ GENERATE_XML = NO XML_OUTPUT = xml -# The XML_SCHEMA tag can be used to specify an XML schema, -# which can be used by a validating XML parser to check the -# syntax of the XML files. - -XML_SCHEMA = - -# The XML_DTD tag can be used to specify an XML DTD, -# which can be used by a validating XML parser to check the -# syntax of the XML files. - -XML_DTD = - # If the XML_PROGRAMLISTING tag is set to YES Doxygen will # dump the program listings (including syntax highlighting # and cross-referencing information) to the XML output. Note that diff --git a/doc/opencc_dict.1 b/doc/opencc_dict.1 deleted file mode 100644 index 02f6626..0000000 --- a/doc/opencc_dict.1 +++ /dev/null @@ -1,18 +0,0 @@ -.TH OPENCC_DICT "1" "June 2010" "opencc_dict " "User Commands" -.SH NAME -opencc_dict \- open chinese convert dictionary tool -.SH DESCRIPTION -Open Chinese Convert (OpenCC) Dictionary Tool -.SS "Usage:" -.HP -opencc_dict \fB\-i\fR input_file \fB\-o\fR output_file -.HP -\fB\-i\fR -Read data from input_file. -.HP -\fB\-o\fR -Write converted data to output_file. -.PP -Open Chinese Convert (OpenCC) Dictionary Tool -.SH "SEE ALSO" -.BR opencc (1) diff --git a/gypi/configs.gypi b/gypi/configs.gypi deleted file mode 100644 index d7d82d4..0000000 --- a/gypi/configs.gypi +++ /dev/null @@ -1,25 +0,0 @@ -{ - "targets": [{ - "target_name": "configs", - "type": "none", - "copies": [{ - "destination": "<(PRODUCT_DIR)", - "files": [ - "../data/config/mix2zhs.ini", - "../data/config/mix2zht.ini", - "../data/config/zhs2zht.ini", - "../data/config/zhs2zhtw_p.ini", - "../data/config/zhs2zhtw_v.ini", - "../data/config/zhs2zhtw_vp.ini", - "../data/config/zht2zhs.ini", - "../data/config/zht2zhtw_p.ini", - "../data/config/zht2zhtw_v.ini", - "../data/config/zht2zhtw_vp.ini", - "../data/config/zhtw2zhcn_s.ini", - "../data/config/zhtw2zhcn_t.ini", - "../data/config/zhtw2zhs.ini", - "../data/config/zhtw2zht.ini" - ] - }] - }] -} diff --git a/gypi/dicts.gypi b/gypi/dicts.gypi deleted file mode 100644 index b30b7b2..0000000 --- a/gypi/dicts.gypi +++ /dev/null @@ -1,60 +0,0 @@ -{ - "includes": [ - "opencc_dict.gypi", - ], - "targets": [{ - "target_name": "dicts", - "type": "none", - "variables": { - "cmd": "<(PRODUCT_DIR)/opencc_dict", - "input_prefix": "data/", - "output_prefix": "<(PRODUCT_DIR)/" - }, - "copies": [{ - "destination": "<(PRODUCT_DIR)", - "files": [ - "../data/tw/to_tw_variants.txt", - "../data/tw/to_tw_phrases.txt", - "../data/tw/from_tw_variants.txt", - "../data/tw/from_tw_phrases.txt", - "../data/cn/to_cn_phrases.txt" - ] - }], - "actions": [{ - "action_name": "simp_to_trad_characters", - "variables": { - "input": "<(input_prefix)simp_to_trad/characters.txt", - }, - "inputs": ["<(cmd)", "<(input)"], - "outputs": ["<(output_prefix)simp_to_trad_characters.ocd"], - "action": ["<(cmd)", "-i", "<(input)", "-o", "<@(_outputs)"] - }, { - "action_name": "simp_to_trad_phrases", - "variables": { - "input": "<(input_prefix)simp_to_trad/phrases.txt", - }, - "inputs": ["<(cmd)", "<(input)"], - "outputs": ["<(output_prefix)simp_to_trad_phrases.ocd"], - "action": ["<(cmd)", "-i", "<(input)", "-o", "<@(_outputs)"] - }, { - "action_name": "trad_to_simp_characters", - "variables": { - "input": "<(input_prefix)trad_to_simp/characters.txt", - }, - "inputs": ["<(cmd)", "<(input)"], - "outputs": ["<(output_prefix)trad_to_simp_characters.ocd"], - "action": ["<(cmd)", "-i", "<(input)", "-o", "<@(_outputs)"] - }, { - "action_name": "trad_to_simp_phrases", - "variables": { - "input": "<(input_prefix)trad_to_simp/phrases.txt", - }, - "inputs": ["<(cmd)", "<(input)"], - "outputs": ["<(output_prefix)trad_to_simp_phrases.ocd"], - "action": ["<(cmd)", "-i", "<(input)", "-o", "<@(_outputs)"] - }], - "dependencies": [ - "opencc_dict" - ] - }] -} diff --git a/gypi/global.gypi b/gypi/global.gypi deleted file mode 100644 index 984324e..0000000 --- a/gypi/global.gypi +++ /dev/null @@ -1,11 +0,0 @@ -{ - "variables": { - "opencc_version": "0.4.3" - }, - "target_defaults": { - "defines": [ - "VERSION=\"<(opencc_version)\"", - "PKGDATADIR=\"\"" - ] - } -} diff --git a/gypi/opencc_dict.gypi b/gypi/opencc_dict.gypi deleted file mode 100644 index c6dd041..0000000 --- a/gypi/opencc_dict.gypi +++ /dev/null @@ -1,17 +0,0 @@ -{ - "targets": [{ - "target_name": "opencc_dict", - "type": "executable", - "sources": [ - "../src/tools/opencc_dict.c", - "../src/encoding.c", - "../src/utils.c", - "../src/dict_group.c", - "../src/dict_chain.c", - "../src/config_reader.c", - "../src/dict.c", - "../src/dictionary/datrie.c", - "../src/dictionary/text.c" - ] - }] -} diff --git a/node/binding.cc b/node/binding.cc index f459e5a..baa94e4 100644 --- a/node/binding.cc +++ b/node/binding.cc @@ -1,97 +1,110 @@ #include #include #include -#include "../src/opencc.h" + +#include "Config.hpp" +#include "Converter.hpp" using namespace v8; +using namespace opencc; -char* ToUtf8String(const Local& str) { - char* utf8 = new char[str->Utf8Length() + 1]; - utf8[str->Utf8Length()] = '\0'; - str->WriteUtf8(utf8); - return utf8; +string ToUtf8String(const Local& str) { + v8::String::Utf8Value utf8(str); + return std::string(*utf8); } class OpenccBinding : public node::ObjectWrap { struct ConvertRequest { - OpenccBinding* opencc_instance; - char* input; - char* output; + OpenccBinding* instance; + string input; + string output; Persistent callback; + Optional ex; + + ConvertRequest() + : instance(nullptr), ex(Optional::Null()) { + } }; + + Config config_; + const ConverterPtr converter_; public: - explicit OpenccBinding(const char * config_file) { - handler_ = opencc_open(config_file); - } + explicit OpenccBinding(const string configFileName) + : config_(), + converter_(config_.NewFromFile(configFileName)) {} virtual ~OpenccBinding() { - if (handler_ != (opencc_t) -1) - opencc_close(handler_); } - operator bool() const { - return handler_ != (opencc_t) -1; + string Convert(const string& input) { + return converter_->Convert(input); } static Handle New(const Arguments& args) { HandleScope scope; - OpenccBinding* opencc_instance; - - if (args.Length() >= 1 && args[0]->IsString()) { - char* config_file = ToUtf8String(args[0]->ToString()); - opencc_instance = new OpenccBinding(config_file); - delete[] config_file; - } else { - const char* config_file = OPENCC_DEFAULT_CONFIG_SIMP_TO_TRAD; - opencc_instance = new OpenccBinding(config_file); - } - - if (!*opencc_instance) { - ThrowException(Exception::Error( - String::New("Can not create opencc instance"))); + OpenccBinding* instance; + + try { + if (args.Length() >= 1 && args[0]->IsString()) { + string configFile = ToUtf8String(args[0]->ToString()); + instance = new OpenccBinding(configFile); + } else { + instance = new OpenccBinding("s2t.json"); + } + } catch (opencc::Exception& e) { + ThrowException(v8::Exception::Error( + String::New(e.what()))); return scope.Close(Undefined()); } - opencc_instance->Wrap(args.This()); + + instance->Wrap(args.This()); return args.This(); } static Handle Convert(const Arguments& args) { HandleScope scope; if (args.Length() < 2 || !args[0]->IsString() || !args[1]->IsFunction()) { - ThrowException(Exception::TypeError(String::New("Wrong arguments"))); + ThrowException(v8::Exception::TypeError(String::New("Wrong arguments"))); return scope.Close(Undefined()); } ConvertRequest* conv_data = new ConvertRequest; - conv_data->opencc_instance = ObjectWrap::Unwrap(args.This()); + conv_data->instance = ObjectWrap::Unwrap(args.This()); conv_data->input = ToUtf8String(args[0]->ToString()); conv_data->callback = Persistent::New(Local::Cast(args[1])); + conv_data->ex = Optional::Null(); uv_work_t* req = new uv_work_t; req->data = conv_data; - uv_queue_work(uv_default_loop(), req, DoConnect, (uv_after_work_cb)AfterConvert); + uv_queue_work(uv_default_loop(), req, DoConvert, (uv_after_work_cb)AfterConvert); return Undefined(); } - - static void DoConnect(uv_work_t* req) { + + static void DoConvert(uv_work_t* req) { ConvertRequest* conv_data = static_cast(req->data); - opencc_t opencc_handler = conv_data->opencc_instance->handler_; - conv_data->output = opencc_convert_utf8(opencc_handler, conv_data->input, (size_t) -1); + OpenccBinding* instance = conv_data->instance; + try { + conv_data->output = instance->Convert(conv_data->input); + } catch (opencc::Exception& e) { + conv_data->ex = Optional(e); + } } static void AfterConvert(uv_work_t* req) { HandleScope scope; ConvertRequest* conv_data = static_cast(req->data); - Local converted = String::New(conv_data->output); + Local err = Local::New(Undefined()); + Local converted = String::New(conv_data->output.c_str()); + if (!conv_data->ex.IsNull()) { + err = String::New(conv_data->ex.Get().what()); + } const unsigned argc = 2; Local argv[argc] = { - Local::New(Undefined()), + err, Local::New(converted) }; conv_data->callback->Call(Context::GetCurrent()->Global(), argc, argv); conv_data->callback.Dispose(); - delete[] conv_data->input; - opencc_convert_utf8_free(conv_data->output); delete conv_data; delete req; } @@ -99,40 +112,24 @@ class OpenccBinding : public node::ObjectWrap { static Handle ConvertSync(const Arguments& args) { HandleScope scope; if (args.Length() < 1 || !args[0]->IsString()) { - ThrowException(Exception::TypeError(String::New("Wrong arguments"))); + ThrowException(v8::Exception::TypeError(String::New("Wrong arguments"))); return scope.Close(Undefined()); } - OpenccBinding* opencc_instance = ObjectWrap::Unwrap(args.This()); - opencc_t opencc_handler = opencc_instance->handler_; - char* input = ToUtf8String(args[0]->ToString()); - char* output = opencc_convert_utf8(opencc_handler, input, (size_t) -1); - - Local converted = String::New(output); - delete[] input; - opencc_convert_utf8_free(output); - return scope.Close(converted); - } - - static Handle SetConversionMode(const Arguments& args) { - HandleScope scope; - if (args.Length() < 1 || !args[0]->IsInt32()) { - ThrowException(Exception::TypeError(String::New("Wrong arguments"))); - return scope.Close(Undefined()); - } + OpenccBinding* instance = ObjectWrap::Unwrap(args.This()); - OpenccBinding* opencc_instance = ObjectWrap::Unwrap(args.This()); - opencc_t opencc_handler = opencc_instance->handler_; - int conversion_mode = args[0]->ToInt32()->Value(); - if (conversion_mode < 0 || conversion_mode > 2) { - ThrowException(Exception::Error( - String::New("conversion_mode must between 0 and 2"))); + string input = ToUtf8String(args[0]->ToString()); + string output; + try { + output = instance->Convert(input); + } catch (opencc::Exception& e) { + ThrowException(v8::Exception::Error( + String::New(e.what()))); return scope.Close(Undefined()); } - opencc_set_conversion_mode(opencc_handler, - (opencc_conversion_mode) conversion_mode); - return scope.Close(Boolean::New(true)); + Local converted = String::New(output.c_str()); + return scope.Close(converted); } static void init(Handle target) { @@ -145,15 +142,11 @@ class OpenccBinding : public node::ObjectWrap { FunctionTemplate::New(Convert)->GetFunction()); tpl->PrototypeTemplate()->Set(String::NewSymbol("convertSync"), FunctionTemplate::New(ConvertSync)->GetFunction()); - tpl->PrototypeTemplate()->Set(String::NewSymbol("setConversionMode"), - FunctionTemplate::New(SetConversionMode)->GetFunction()); // Constructor Persistent constructor = Persistent::New( tpl->GetFunction()); target->Set(String::NewSymbol("Opencc"), constructor); } - - opencc_t handler_; }; void init(Handle target) { diff --git a/node/configs.gypi b/node/configs.gypi new file mode 100644 index 0000000..92ac631 --- /dev/null +++ b/node/configs.gypi @@ -0,0 +1,19 @@ +{ + "targets": [{ + "target_name": "configs", + "type": "none", + "copies": [{ + "destination": "<(PRODUCT_DIR)", + "files": [ + "../data/config/s2t.json", + "../data/config/s2tw.json", + "../data/config/s2twp.json", + "../data/config/t2s.json", + "../data/config/tw2s.json", + "../data/config/tw2sp.json", + "../data/config/s2hk.json", + "../data/config/hk2s.json", + ] + }] + }] +} diff --git a/node/demo.js b/node/demo.js index b55c539..eba2da5 100644 --- a/node/demo.js +++ b/node/demo.js @@ -5,7 +5,7 @@ * @license * Open Chinese Convert * - * Copyright 2010-2013 BYVoid + * Copyright 2010-2014 BYVoid * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,10 +29,7 @@ var OpenCC = require('./opencc'); // Load the default Simplified to Traditional config -var opencc = new OpenCC('zhs2zht.ini'); - -// Set conversion mode -opencc.setConversionMode(OpenCC.CONVERSION_FAST); +var opencc = new OpenCC('s2t.json'); // Sync API var converted = opencc.convertSync("汉字"); diff --git a/node/dicts.gypi b/node/dicts.gypi new file mode 100644 index 0000000..9cf4d06 --- /dev/null +++ b/node/dicts.gypi @@ -0,0 +1,158 @@ +{ + "targets": [{ + "target_name": "dicts", + "type": "none", + "variables": { + "cmd": "<(PRODUCT_DIR)/opencc_dict", + "dict_merge": "<(module_root_dir)/data/scripts/merge.py", + "dict_reverse": "<(module_root_dir)/data/scripts/reverse.py", + "input_prefix": "<(module_root_dir)/data/dictionary/", + "output_prefix": "<(PRODUCT_DIR)/" + }, + "actions": [{ + "action_name": "STCharacters", + "variables": { + "input": "<(input_prefix)STCharacters.txt", + }, + "inputs": ["<(cmd)", "<(input)"], + "outputs": ["<(output_prefix)STCharacters.ocd"], + "action": ["<(cmd)", "-i", "<(input)", "-o", "<@(_outputs)", "--from text", "--to ocd"] + }, { + "action_name": "STPhrases", + "variables": { + "input": "<(input_prefix)STPhrases.txt", + }, + "inputs": ["<(cmd)", "<(input)"], + "outputs": ["<(output_prefix)STPhrases.ocd"], + "action": ["<(cmd)", "-i", "<(input)", "-o", "<@(_outputs)", "--from text", "--to ocd"] + }, { + "action_name": "TSCharacters", + "variables": { + "input": "<(input_prefix)TSCharacters.txt", + }, + "inputs": ["<(cmd)", "<(input)"], + "outputs": ["<(output_prefix)TSCharacters.ocd"], + "action": ["<(cmd)", "-i", "<(input)", "-o", "<@(_outputs)", "--from text", "--to ocd"] + }, { + "action_name": "TSPhrases", + "variables": { + "input": "<(input_prefix)TSPhrases.txt", + }, + "inputs": ["<(cmd)", "<(input)"], + "outputs": ["<(output_prefix)TSPhrases.ocd"], + "action": ["<(cmd)", "-i", "<(input)", "-o", "<@(_outputs)", "--from text", "--to ocd"] + }, { + "action_name": "TWVariants", + "variables": { + "input": "<(input_prefix)TWVariants.txt", + }, + "inputs": ["<(cmd)", "<(input)"], + "outputs": ["<(output_prefix)TWVariants.ocd"], + "action": ["<(cmd)", "-i", "<(input)", "-o", "<@(_outputs)", "--from text", "--to ocd"] + }, { + "action_name": "TWVariantsRevPhrases", + "variables": { + "input": "<(input_prefix)TWVariantsRevPhrases.txt", + }, + "inputs": ["<(cmd)", "<(input)"], + "outputs": ["<(output_prefix)TWVariantsRevPhrases.ocd"], + "action": ["<(cmd)", "-i", "<(input)", "-o", "<@(_outputs)", "--from text", "--to ocd"] + }, { + "action_name": "JPVariants", + "variables": { + "input": "<(input_prefix)JPVariants.txt", + }, + "inputs": ["<(cmd)", "<(input)"], + "outputs": ["<(output_prefix)JPVariants.ocd"], + "action": ["<(cmd)", "-i", "<(input)", "-o", "<@(_outputs)", "--from text", "--to ocd"] + }, { + "action_name": "TWPhrases.txt", + "inputs": ["<(cmd)"], + "outputs": ["<(output_prefix)TWPhrases.txt"], + "action": ["python", "<(dict_merge)", "<(input_prefix)TWPhrasesIT.txt", "<(input_prefix)TWPhrasesName.txt", "<(input_prefix)TWPhrasesOther.txt", "<@(_outputs)"] + }, { + "action_name": "TWVariantsRev.txt", + "variables": { + "input": "<(input_prefix)TWVariants.txt", + }, + "inputs": ["<(cmd)", "<(input)"], + "outputs": ["<(output_prefix)TWVariantsRev.txt"], + "action": ["python", "<(dict_reverse)", "<(input)", "<@(_outputs)"] + }, { + "action_name": "TWPhrasesRev.txt", + "variables": { + "input": "<(output_prefix)TWPhrases.txt", + }, + "inputs": ["<(cmd)", "<(input)"], + "outputs": ["<(output_prefix)TWPhrasesRev.txt"], + "action": ["python", "<(dict_reverse)", "<(input)", "<@(_outputs)"] + }, { + "action_name": "TWPhrases", + "variables": { + "input": "<(output_prefix)TWPhrases.txt", + }, + "inputs": ["<(cmd)", "<(input)"], + "outputs": ["<(output_prefix)TWPhrases.ocd"], + "action": ["<(cmd)", "-i", "<(input)", "-o", "<@(_outputs)", "--from text", "--to ocd"] + }, { + "action_name": "TWVariantsRev", + "variables": { + "input": "<(output_prefix)TWVariantsRev.txt", + }, + "inputs": ["<(cmd)", "<(input)"], + "outputs": ["<(output_prefix)TWVariantsRev.ocd"], + "action": ["<(cmd)", "-i", "<(input)", "-o", "<@(_outputs)", "--from text", "--to ocd"] + }, { + "action_name": "TWPhrasesRev", + "variables": { + "input": "<(output_prefix)TWPhrasesRev.txt", + }, + "inputs": ["<(cmd)", "<(input)"], + "outputs": ["<(output_prefix)TWPhrasesRev.ocd"], + "action": ["<(cmd)", "-i", "<(input)", "-o", "<@(_outputs)", "--from text", "--to ocd"] + }, { + "action_name": "HKVariants", + "variables": { + "input": "<(input_prefix)HKVariants.txt", + }, + "inputs": ["<(cmd)", "<(input)"], + "outputs": ["<(output_prefix)HKVariants.ocd"], + "action": ["<(cmd)", "-i", "<(input)", "-o", "<@(_outputs)", "--from text", "--to ocd"] + }, { + "action_name": "HKVariantsPhrases", + "variables": { + "input": "<(input_prefix)HKVariantsPhrases.txt", + }, + "inputs": ["<(cmd)", "<(input)"], + "outputs": ["<(output_prefix)HKVariantsPhrases.ocd"], + "action": ["<(cmd)", "-i", "<(input)", "-o", "<@(_outputs)", "--from text", "--to ocd"] + }, { + "action_name": "HKVariantsRevPhrases", + "variables": { + "input": "<(input_prefix)HKVariantsRevPhrases.txt", + }, + "inputs": ["<(cmd)", "<(input)"], + "outputs": ["<(output_prefix)HKVariantsRevPhrases.ocd"], + "action": ["<(cmd)", "-i", "<(input)", "-o", "<@(_outputs)", "--from text", "--to ocd"] + }, { + "action_name": "HKVariantsRev.txt", + "variables": { + "input": "<(input_prefix)HKVariants.txt", + }, + "inputs": ["<(cmd)", "<(input)"], + "outputs": ["<(output_prefix)HKVariantsRev.txt"], + "action": ["python", "<(dict_reverse)", "<(input)", "<@(_outputs)"] + }, { + "action_name": "HKVariantsRev", + "variables": { + "input": "<(output_prefix)HKVariantsRev.txt", + }, + "inputs": ["<(cmd)", "<(input)"], + "outputs": ["<(output_prefix)HKVariantsRev.ocd"], + "action": ["<(cmd)", "-i", "<(input)", "-o", "<@(_outputs)", "--from text", "--to ocd"] + }], + "dependencies": [ + "opencc_dict" + ] + }] +} diff --git a/node/global.gypi b/node/global.gypi new file mode 100644 index 0000000..63dd7b0 --- /dev/null +++ b/node/global.gypi @@ -0,0 +1,30 @@ +{ + "variables": { + "opencc_version": "1.0.2" + }, + "target_defaults": { + "defines": [ + "VERSION=\"<(opencc_version)\"" + ], + "conditions": [ + ["OS=='linux'", { + "cflags": [ + "-std=c++0x" + ], + "cflags!": ["-fno-exceptions"], + "cflags_cc!": ["-fno-exceptions"], + }], + ["OS=='mac'", { + 'xcode_settings': { + 'GCC_ENABLE_CPP_EXCEPTIONS': 'YES', + 'MACOSX_DEPLOYMENT_TARGET': '10.7', + 'OTHER_CPLUSPLUSFLAGS': ["-std=c++11", "-stdlib=libc++"], + 'OTHER_LDFLAGS': ["-stdlib=libc++"] + } + }], + ["OS=='win'", { + "defines": ["Opencc_BUILT_AS_STATIC"] + }] + ] + } +} diff --git a/node/node_binding.gypi b/node/node_binding.gypi new file mode 100644 index 0000000..1c7006f --- /dev/null +++ b/node/node_binding.gypi @@ -0,0 +1,26 @@ +{ + "targets": [{ + "target_name": "binding", + "sources": [ + "../node/binding.cc", + "../src/BinaryDict.cpp", + "../src/Config.cpp", + "../src/Conversion.cpp", + "../src/ConversionChain.cpp", + "../src/Converter.cpp", + "../src/DartsDict.cpp", + "../src/Dict.cpp", + "../src/DictEntry.cpp", + "../src/DictGroup.cpp", + "../src/MaxMatchSegmentation.cpp", + "../src/Segmentation.cpp", + "../src/TextDict.cpp", + "../src/UTF8Util.cpp", + ], + "include_dirs": [ + "../src", + "../deps/darts-clone", + "../deps/rapidjson-0.11" + ] + }] +} diff --git a/node/opencc.js b/node/opencc.js index 34ef812..4604b9b 100644 --- a/node/opencc.js +++ b/node/opencc.js @@ -5,7 +5,7 @@ * @license * Open Chinese Convert * - * Copyright 2010-2013 BYVoid + * Copyright 2010-2014 BYVoid * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -48,34 +48,12 @@ var getConfigPath = function (config) { */ var OpenCC = module.exports = function (config) { if (!config) { - config = 'zhs2zht.ini'; + config = 's2t.json'; } config = getConfigPath(config); this.handler = new binding.Opencc(config); }; - -/** - * Default conversion mode. - * - * @ingroup node_api - */ -OpenCC.CONVERSION_FAST = 0; - -/** - * Only converts text into segments. - * - * @ingroup node_api - */ -OpenCC.CONVERSION_SEGMENT_ONLY = 1; - -/** - * List all candidates of every segment. - * - * @ingroup node_api - */ -OpenCC.CONVERSION_LIST_CANDIDATES = 2; - /** * Converts input text. * @@ -101,15 +79,3 @@ OpenCC.prototype.convert = function (input, callback) { OpenCC.prototype.convertSync = function (input) { return this.handler.convertSync(input.toString()); }; - -/** - * Sets conversion mode. - * - * @fn void setConversionMode(int conversionMode) - * @memberof OpenCC - * @param conversionMode Conversion mode. - * @ingroup node_api - */ -OpenCC.prototype.setConversionMode = function (conversionMode) { - return this.handler.setConversionMode(conversionMode); -}; diff --git a/node/opencc_dict.gypi b/node/opencc_dict.gypi new file mode 100644 index 0000000..f9026f3 --- /dev/null +++ b/node/opencc_dict.gypi @@ -0,0 +1,21 @@ +{ + "targets": [{ + "target_name": "opencc_dict", + "type": "executable", + "sources": [ + "../src/BinaryDict.cpp", + "../src/DartsDict.cpp", + "../src/Dict.cpp", + "../src/DictConverter.cpp", + "../src/DictEntry.cpp", + "../src/DictGroup.cpp", + "../src/TextDict.cpp", + "../src/UTF8Util.cpp", + ], + "include_dirs": [ + "../src", + "../deps/darts-clone", + "../deps/tclap-1.2.1" + ] + }] +} diff --git a/node/test.js b/node/test.js index fce41b5..7020486 100644 --- a/node/test.js +++ b/node/test.js @@ -3,36 +3,32 @@ var fs = require('fs'); var OpenCC = require('./opencc'); var configs = [ - 'zhs2zht', - 'zht2zhs', - 'mix2zht', - 'mix2zhs', - 'zhs2zhtw_p', - 'zhs2zhtw_vp', - 'zhtw2zhcn_t', - 'zhtw2zhcn_s', + 's2t', + 's2tw', + 's2twp', + 't2s', + 'tw2s', + 'tw2sp', + 's2hk', + 'hk2s', ]; var testSync = function (config, done) { var inputName = 'test/testcases/' + config + '.in'; var outputName = 'test/testcases/' + config + '.ans'; - var configName = config + '.ini'; + var configName = config + '.json'; var opencc = new OpenCC(configName); - fs.readFile(inputName, 'utf-8', function (err, text) { - if (err) return done(err); - var converted = opencc.convertSync(text); - fs.readFile(outputName, 'utf-8', function (err, answer) { - if (err) return done(err); - assert.equal(converted, answer); - done(); - }); - }); + var text = fs.readFileSync(inputName, 'utf-8'); + var converted = opencc.convertSync(text); + var answer = fs.readFileSync(outputName, 'utf-8'); + assert.equal(converted, answer); + done(); }; var testAsync = function (config, done) { var inputName = 'test/testcases/' + config + '.in'; var outputName = 'test/testcases/' + config + '.ans'; - var configName = config + '.ini'; + var configName = config + '.json'; var opencc = new OpenCC(configName); fs.readFile(inputName, 'utf-8', function (err, text) { if (err) return done(err); diff --git a/opencc.gyp b/opencc.gyp deleted file mode 100644 index 5fab4b9..0000000 --- a/opencc.gyp +++ /dev/null @@ -1,39 +0,0 @@ -{ - "includes": [ - "gypi/global.gypi", - "gypi/configs.gypi", - "gypi/dicts.gypi", - ], - "targets": [{ - "target_name": "libopencc", - "type": "<(library)", - "sources": [ - "src/config_reader.c", - "src/converter.c", - "src/dict_group.c", - "src/dict_chain.c", - "src/encoding.c", - "src/utils.c", - "src/opencc.c", - "src/dict.c", - "src/dictionary/datrie.c", - "src/dictionary/text.c" - ], - "conditions": [ - ["OS=='linux'", { - "cflags": [ - "-fPIC" - ] - }] - ] - }, { - "target_name": "opencc", - "type": "executable", - "sources": [ - "src/tools/opencc.c" - ], - "dependencies": [ - "libopencc" - ] - }] -} diff --git a/package.json b/package.json index f00097b..90bf935 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "name": "opencc", - "version": "0.4.3", - "description": "A project for conversion between Traditional and Simplified Chinese", + "version": "1.0.2", + "description": "Conversion between Traditional and Simplified Chinese", "author": "BYVoid ", "license": "Apache", "main": "node/opencc.js", @@ -13,12 +13,13 @@ "url": "git://github.com/BYVoid/OpenCC.git" }, "bugs": { - "url": "https://code.google.com/p/opencc/issues/entry" + "url": "https://github.com/BYVoid/Opencc/issues" }, "keywords": [ "opencc", "Chinese", "conversion", + "unicode", "Simplified Chinese", "Traditional Chinese" ], diff --git a/po/CMakeLists.txt b/po/CMakeLists.txt deleted file mode 100644 index 2661fd2..0000000 --- a/po/CMakeLists.txt +++ /dev/null @@ -1,33 +0,0 @@ -file(STRINGS LINGUAS LANGUAGES) -separate_arguments(LANGUAGES) -set(DOMAIN ${PACKAGE_NAME}) - -foreach(LANG ${LANGUAGES}) - - add_custom_target( - ${LANG}_mo - ALL - DEPENDS - ${LANG}.mo - ) - - add_custom_command( - OUTPUT ${LANG}.mo - COMMAND ${GETTEXT_MSGFMT_EXECUTABLE} - ${GETTEXT_MSGFMT_PARAMETER} - -o ${LANG}.mo ${CMAKE_SOURCE_DIR}/po/${LANG}.po - DEPENDS - ${LANG}.po - COMMENT "mo-update [${LANG}]: Creating mo file." - ) - - install( - FILES - ${CMAKE_BINARY_DIR}/po/${LANG}.mo - RENAME - ${DOMAIN}.mo - DESTINATION - ${DIR_SHARE_LOCALE}/${LANG}/LC_MESSAGES - ) - -endforeach(LANG ${LANGUAGES}) \ No newline at end of file diff --git a/po/LINGUAS b/po/LINGUAS deleted file mode 100644 index cb377b9..0000000 --- a/po/LINGUAS +++ /dev/null @@ -1,3 +0,0 @@ -zh_CN -zh_HK -zh_TW diff --git a/po/POTFILES.in b/po/POTFILES.in deleted file mode 100644 index 731fcc0..0000000 --- a/po/POTFILES.in +++ /dev/null @@ -1,24 +0,0 @@ -src/config_reader.c -src/config_reader.h -src/converter.c -src/converter.h -src/dict_group.c -src/dict_group.h -src/dict_chain.c -src/dict_chain.h -src/encoding.c -src/encoding.h -src/opencc.c -src/opencc.h -src/opencc_types.h -src/utils.c -src/utils.h -src/wrapper/cplusplus/openccxx.h -src/dict.c -src/dict.h -src/dictionary/datrie.c -src/dictionary/datrie.h -src/dictionary/text.c -src/dictionary/text.h -src/tools/opencc.c -src/tools/opencc_dict.c diff --git a/po/update.sh b/po/update.sh deleted file mode 100755 index b4e97ee..0000000 --- a/po/update.sh +++ /dev/null @@ -1,25 +0,0 @@ -#!/bin/sh - -xgettext \ - --default-domain="opencc" \ - --directory=".." \ - --force-po \ - --add-comments="TRANSLATORS:" \ - --keyword=_ --keyword=N_ \ - --files-from="POTFILES.in" \ - --copyright-holder="BYVoid " \ - --msgid-bugs-address="http://code.google.com/p/open-chinese-convert/issues/entry" \ - --from-code=UTF-8 \ - --sort-by-file \ - --output=opencc.pot - -for LANG in `cat LINGUAS` -do - echo -n $LANG - msgmerge \ - --backup=none \ - --update $LANG.po \ - opencc.pot -done - -rm opencc.pot \ No newline at end of file diff --git a/po/zh_CN.po b/po/zh_CN.po deleted file mode 100644 index e84bc16..0000000 --- a/po/zh_CN.po +++ /dev/null @@ -1,252 +0,0 @@ -# Chinese translations for opencc package. -# Copyright (C) 2010 BYVoid -# This file is distributed under the same license as the opencc package. -# -# BYVoid , 2010. -msgid "" -msgstr "" -"Project-Id-Version: opencc 0.1.2\n" -"Report-Msgid-Bugs-To: http://code.google.com/p/open-chinese-convert/issues/" -"entry\n" -"POT-Creation-Date: 2010-09-17 08:39+0800\n" -"PO-Revision-Date: 2010-09-17 08:48+0800\n" -"Last-Translator: \n" -"Language-Team: American English \n" -"Language: zh_CN\n" -"MIME-Version: 1.0\n" -"Content-Type: text/plain; charset=UTF-8\n" -"Content-Transfer-Encoding: 8bit\n" -"X-Generator: Lokalize 1.0\n" -"Plural-Forms: nplurals=2; plural=(n != 1);\n" - -#: src/config_reader.c:275 -msgid "Can not access configuration file" -msgstr "无法访问配置文件" - -#: src/config_reader.c:278 -msgid "Configuration file parse error" -msgstr "配置文件解析错误" - -#: src/config_reader.c:281 -msgid "Invalid property" -msgstr "无效属性" - -#: src/config_reader.c:284 -msgid "Invalid dictionary type" -msgstr "无效的辞典类型" - -#: src/config_reader.c:287 src/converter.c:747 src/dict_group.c:218 -#: src/opencc.c:271 -msgid "Unknown" -msgstr "未知" - -#: src/converter.c:741 src/dict_group.c:206 -msgid "No dictionary loaded" -msgstr "没有辞典加载" - -#: src/converter.c:744 -msgid "Output buffer not enough for one segment" -msgstr "输出缓冲区不足以存储一个分词" - -#: src/dict_group.c:209 -msgid "Can not open dictionary file" -msgstr "无法打开辞典" - -#: src/dict_group.c:212 -msgid "Invalid dictionary file" -msgstr "辞典格式无效" - -#: src/dict_group.c:215 -msgid "Invalid dictionary index" -msgstr "辞典索引无效" - -#: src/opencc.c:262 -msgid "Dictionary loading error" -msgstr "辞典读取错误" - -#: src/opencc.c:265 -msgid "Configuration error" -msgstr "配置错误" - -#: src/opencc.c:268 -msgid "Converter error" -msgstr "转换器错误" - -#: src/tools/opencc.c:39 -msgid "OpenCC initialization error" -msgstr "OpenCC初始化错误" - -#: src/tools/opencc.c:51 -#, c-format -msgid "Can not read file: %s\n" -msgstr "无法读取文件:%s\n" - -#: src/tools/opencc.c:61 src/tools/opencc_dict.c:296 -#, c-format -msgid "Can not write file: %s\n" -msgstr "无法写入文件: %s\n" - -#: src/tools/opencc.c:72 -msgid "OpenCC error" -msgstr "OpenCC 错误" - -#: src/tools/opencc.c:86 src/tools/opencc.c:89 src/tools/opencc.c:92 -#: src/tools/opencc.c:100 src/tools/opencc.c:107 src/tools/opencc.c:110 -#: src/tools/opencc_dict.c:246 src/tools/opencc_dict.c:254 -#: src/tools/opencc_dict.c:385 src/tools/opencc_dict.c:386 -#, c-format -msgid "\n" -msgstr "\n" - -#: src/tools/opencc.c:87 -#, c-format -msgid "Open Chinese Convert (OpenCC) Command Line Tool\n" -msgstr "Open Chinese Convert (OpenCC) 命令行工具\n" - -#: src/tools/opencc.c:88 -#, c-format -msgid "Version %s\n" -msgstr "版本 %s\n" - -#: src/tools/opencc.c:90 -#, c-format -msgid "Author: %s\n" -msgstr "作者: %s\n" - -#: src/tools/opencc.c:91 -#, c-format -msgid "Bug Report: %s\n" -msgstr "Bug汇报: %s\n" - -#: src/tools/opencc.c:98 src/tools/opencc_dict.c:379 -#, c-format -msgid "Usage:\n" -msgstr "使用方法:\n" - -#: src/tools/opencc.c:99 -#, c-format -msgid " opencc [Options]\n" -msgstr " opencc [参数]\n" - -#: src/tools/opencc.c:101 -#, c-format -msgid "Options:\n" -msgstr "参数:\n" - -#: src/tools/opencc.c:102 -#, c-format -msgid " -i [file], --input=[file] Read original text from [file].\n" -msgstr " -i [file], --input=[file] 从 [file] 读取原始文本。\n" - -#: src/tools/opencc.c:103 -#, c-format -msgid " -o [file], --output=[file] Write converted text to [file].\n" -msgstr " -o [file], --output=[file] 将转换后的文本写入 [file].\n" - -#: src/tools/opencc.c:104 -#, c-format -msgid "" -" -c [file], --config=[file] Load configuration of conversion from [file].\n" -msgstr " -c [file], --config=[file] 从 [file] 中读取配置。\n" - -#: src/tools/opencc.c:105 -#, c-format -msgid " -v, --version Print version and build information.\n" -msgstr " -v, --version 显示版本和生成信息。\n" - -#: src/tools/opencc.c:106 -#, c-format -msgid " -h, --help Print this help.\n" -msgstr " -h, --help 显示此帮助。\n" - -#: src/tools/opencc.c:108 -#, c-format -msgid "" -"With no input file, reads standard input and writes converted stream to " -"standard output.\n" -msgstr "如果没有设置输入文件,将会从标准输入中读取数据,并输出到标准输出。\n" - -#: src/tools/opencc.c:109 -#, c-format -msgid "Default configuration(%s) will be loaded if not set.\n" -msgstr "如果没有设置config file,则会读取默认配置文件(%s)。\n" - -#: src/tools/opencc.c:144 -#, c-format -msgid "Please use %s --help.\n" -msgstr "请使用%s --help以获得帮助。\n" - -#: src/tools/opencc_dict.c:373 -#, c-format -msgid "" -"\n" -"Open Chinese Convert (OpenCC) Dictionary Tool\n" -"Version %s\n" -"\n" -msgstr "" -"\n" -"Open Chinese Convert (OpenCC) 辞典工具\n" -"版本 %s\n" -"\n" - -#: src/tools/opencc_dict.c:380 -#, c-format -msgid "" -" opencc_dict -i input_file -o output_file\n" -"\n" -msgstr "" -" opencc_dict -i input_file -o output_file\n" -"\n" - -#: src/tools/opencc_dict.c:381 -#, c-format -msgid " -i input_file\n" -msgstr " -i input_file\n" - -#: src/tools/opencc_dict.c:382 -#, c-format -msgid " Read data from input_file.\n" -msgstr " 从input_file读取数据。\n" - -#: src/tools/opencc_dict.c:383 -#, c-format -msgid " -o output_file\n" -msgstr " -o output_file\n" - -#: src/tools/opencc_dict.c:384 -#, c-format -msgid " Write converted data to output_file.\n" -msgstr " 将生成的辞典写入output_file。\n" - -#: src/tools/opencc_dict.c:432 -#, c-format -msgid "Please specify input file using -i.\n" -msgstr "请使用-i指定输入文件。\n" - -#: src/tools/opencc_dict.c:439 -#, c-format -msgid "Please specify output file using -o.\n" -msgstr "请使用-o指定输入文件。\n" - -#~ msgid "" -#~ " opencc [-i input_file] [-o output_file] [-c config_file]\n" -#~ "\n" -#~ msgstr "" -#~ " opencc [-i input_file] [-o output_file] [-c config_file]\n" -#~ "\n" - -#~ msgid " -c config_file\n" -#~ msgstr " -c config_file\n" - -#~ msgid " Load dictionary configuration from config_file.\n" -#~ msgstr " 从config_file读取配置。\n" - -#~ msgid " Note:\n" -#~ msgstr " 注释:\n" - -#~ msgid "" -#~ " Text from standard input will be read if input_file is not set\n" -#~ " and will be written to standard output if output_file is not set.\n" -#~ msgstr "" -#~ " 如果没有设置input_file,将会从标准输入读取文本。如果\n" -#~ " 没有设置output_file,将会把转换后文本写入到标准输出。\n" diff --git a/po/zh_HK.po b/po/zh_HK.po deleted file mode 100644 index d66bb14..0000000 --- a/po/zh_HK.po +++ /dev/null @@ -1,252 +0,0 @@ -# Chinese translations for opencc package. -# Copyright (C) 2010 BYVoid -# This file is distributed under the same license as the opencc package. -# -# BYVoid , 2010. -msgid "" -msgstr "" -"Project-Id-Version: opencc 0.1.2\n" -"Report-Msgid-Bugs-To: http://code.google.com/p/open-chinese-convert/issues/" -"entry\n" -"POT-Creation-Date: 2010-09-17 08:39+0800\n" -"PO-Revision-Date: 2010-09-17 08:48+0800\n" -"Last-Translator: \n" -"Language-Team: American English \n" -"Language: zh_HK\n" -"MIME-Version: 1.0\n" -"Content-Type: text/plain; charset=UTF-8\n" -"Content-Transfer-Encoding: 8bit\n" -"X-Generator: Lokalize 1.0\n" -"Plural-Forms: nplurals=2; plural=(n != 1);\n" - -#: src/config_reader.c:275 -msgid "Can not access configuration file" -msgstr "無法訪問配置文件" - -#: src/config_reader.c:278 -msgid "Configuration file parse error" -msgstr "配置文件解析錯誤" - -#: src/config_reader.c:281 -msgid "Invalid property" -msgstr "無效屬性" - -#: src/config_reader.c:284 -msgid "Invalid dictionary type" -msgstr "無效的辭典類型" - -#: src/config_reader.c:287 src/converter.c:747 src/dict_group.c:218 -#: src/opencc.c:271 -msgid "Unknown" -msgstr "未知" - -#: src/converter.c:741 src/dict_group.c:206 -msgid "No dictionary loaded" -msgstr "沒有辭典加載" - -#: src/converter.c:744 -msgid "Output buffer not enough for one segment" -msgstr "輸出緩衝區不足以存儲一個分詞" - -#: src/dict_group.c:209 -msgid "Can not open dictionary file" -msgstr "無法打開辭典" - -#: src/dict_group.c:212 -msgid "Invalid dictionary file" -msgstr "辭典格式無效" - -#: src/dict_group.c:215 -msgid "Invalid dictionary index" -msgstr "辭典索引無效" - -#: src/opencc.c:262 -msgid "Dictionary loading error" -msgstr "辭典讀取錯誤" - -#: src/opencc.c:265 -msgid "Configuration error" -msgstr "配置錯誤" - -#: src/opencc.c:268 -msgid "Converter error" -msgstr "轉換器錯誤" - -#: src/tools/opencc.c:39 -msgid "OpenCC initialization error" -msgstr "OpenCC初始化錯誤" - -#: src/tools/opencc.c:51 -#, c-format -msgid "Can not read file: %s\n" -msgstr "無法讀取文件:%s\n" - -#: src/tools/opencc.c:61 src/tools/opencc_dict.c:296 -#, c-format -msgid "Can not write file: %s\n" -msgstr "無法寫入文件: %s\n" - -#: src/tools/opencc.c:72 -msgid "OpenCC error" -msgstr "OpenCC 錯誤" - -#: src/tools/opencc.c:86 src/tools/opencc.c:89 src/tools/opencc.c:92 -#: src/tools/opencc.c:100 src/tools/opencc.c:107 src/tools/opencc.c:110 -#: src/tools/opencc_dict.c:246 src/tools/opencc_dict.c:254 -#: src/tools/opencc_dict.c:385 src/tools/opencc_dict.c:386 -#, c-format -msgid "\n" -msgstr "\n" - -#: src/tools/opencc.c:87 -#, c-format -msgid "Open Chinese Convert (OpenCC) Command Line Tool\n" -msgstr "Open Chinese Convert (OpenCC) 命令行工具\n" - -#: src/tools/opencc.c:88 -#, c-format -msgid "Version %s\n" -msgstr "版本 %s\n" - -#: src/tools/opencc.c:90 -#, c-format -msgid "Author: %s\n" -msgstr "作者: %s\n" - -#: src/tools/opencc.c:91 -#, c-format -msgid "Bug Report: %s\n" -msgstr "Bug彙報: %s\n" - -#: src/tools/opencc.c:98 src/tools/opencc_dict.c:379 -#, c-format -msgid "Usage:\n" -msgstr "使用方法:\n" - -#: src/tools/opencc.c:99 -#, c-format -msgid " opencc [Options]\n" -msgstr " opencc [參數]\n" - -#: src/tools/opencc.c:101 -#, c-format -msgid "Options:\n" -msgstr "參數:\n" - -#: src/tools/opencc.c:102 -#, c-format -msgid " -i [file], --input=[file] Read original text from [file].\n" -msgstr " -i [file], --input=[file] 從 [file] 讀取原始文本。\n" - -#: src/tools/opencc.c:103 -#, c-format -msgid " -o [file], --output=[file] Write converted text to [file].\n" -msgstr " -o [file], --output=[file] 將轉換後的文本寫入 [file].\n" - -#: src/tools/opencc.c:104 -#, c-format -msgid "" -" -c [file], --config=[file] Load configuration of conversion from [file].\n" -msgstr " -c [file], --config=[file] 從 [file] 中讀取配置。\n" - -#: src/tools/opencc.c:105 -#, c-format -msgid " -v, --version Print version and build information.\n" -msgstr " -v, --version 顯示版本和生成信息。\n" - -#: src/tools/opencc.c:106 -#, c-format -msgid " -h, --help Print this help.\n" -msgstr " -h, --help 顯示此幫助。\n" - -#: src/tools/opencc.c:108 -#, c-format -msgid "" -"With no input file, reads standard input and writes converted stream to " -"standard output.\n" -msgstr "如果沒有設置輸入文件,將會從標準輸入中讀取數據,並輸出到標準輸出。\n" - -#: src/tools/opencc.c:109 -#, c-format -msgid "Default configuration(%s) will be loaded if not set.\n" -msgstr "如果沒有設置config file,則會讀取默認配置文件(%s)。\n" - -#: src/tools/opencc.c:144 -#, c-format -msgid "Please use %s --help.\n" -msgstr "請使用%s --help以獲得幫助。\n" - -#: src/tools/opencc_dict.c:373 -#, c-format -msgid "" -"\n" -"Open Chinese Convert (OpenCC) Dictionary Tool\n" -"Version %s\n" -"\n" -msgstr "" -"\n" -"Open Chinese Convert (OpenCC) 辭典工具\n" -"版本 %s\n" -"\n" - -#: src/tools/opencc_dict.c:380 -#, c-format -msgid "" -" opencc_dict -i input_file -o output_file\n" -"\n" -msgstr "" -" opencc_dict -i input_file -o output_file\n" -"\n" - -#: src/tools/opencc_dict.c:381 -#, c-format -msgid " -i input_file\n" -msgstr " -i input_file\n" - -#: src/tools/opencc_dict.c:382 -#, c-format -msgid " Read data from input_file.\n" -msgstr " 從input_file讀取數據。\n" - -#: src/tools/opencc_dict.c:383 -#, c-format -msgid " -o output_file\n" -msgstr " -o output_file\n" - -#: src/tools/opencc_dict.c:384 -#, c-format -msgid " Write converted data to output_file.\n" -msgstr " 將生成的辭典寫入output_file。\n" - -#: src/tools/opencc_dict.c:432 -#, c-format -msgid "Please specify input file using -i.\n" -msgstr "請使用-i指定輸入文件。\n" - -#: src/tools/opencc_dict.c:439 -#, c-format -msgid "Please specify output file using -o.\n" -msgstr "請使用-o指定輸入文件。\n" - -#~ msgid "" -#~ " opencc [-i input_file] [-o output_file] [-c config_file]\n" -#~ "\n" -#~ msgstr "" -#~ " opencc [-i input_file] [-o output_file] [-c config_file]\n" -#~ "\n" - -#~ msgid " -c config_file\n" -#~ msgstr " -c config_file\n" - -#~ msgid " Load dictionary configuration from config_file.\n" -#~ msgstr " 從config_file讀取配置。\n" - -#~ msgid " Note:\n" -#~ msgstr " 註釋:\n" - -#~ msgid "" -#~ " Text from standard input will be read if input_file is not set\n" -#~ " and will be written to standard output if output_file is not set.\n" -#~ msgstr "" -#~ " 如果沒有設置input_file,將會從標準輸入讀取文本。如果\n" -#~ " 沒有設置output_file,將會把轉換後文本寫入到標準輸出。\n" diff --git a/po/zh_TW.po b/po/zh_TW.po deleted file mode 100644 index 9ad2dcd..0000000 --- a/po/zh_TW.po +++ /dev/null @@ -1,252 +0,0 @@ -# Chinese translations for opencc package. -# Copyright (C) 2010 BYVoid -# This file is distributed under the same license as the opencc package. -# -# BYVoid , 2010. -msgid "" -msgstr "" -"Project-Id-Version: opencc 0.1.2\n" -"Report-Msgid-Bugs-To: http://code.google.com/p/open-chinese-convert/issues/" -"entry\n" -"POT-Creation-Date: 2010-09-17 08:39+0800\n" -"PO-Revision-Date: 2010-09-17 08:48+0800\n" -"Last-Translator: \n" -"Language-Team: American English \n" -"Language: zh_TW\n" -"MIME-Version: 1.0\n" -"Content-Type: text/plain; charset=UTF-8\n" -"Content-Transfer-Encoding: 8bit\n" -"X-Generator: Lokalize 1.0\n" -"Plural-Forms: nplurals=2; plural=(n != 1);\n" - -#: src/config_reader.c:275 -msgid "Can not access configuration file" -msgstr "無法訪問配置文件" - -#: src/config_reader.c:278 -msgid "Configuration file parse error" -msgstr "配置文件解析錯誤" - -#: src/config_reader.c:281 -msgid "Invalid property" -msgstr "無效屬性" - -#: src/config_reader.c:284 -msgid "Invalid dictionary type" -msgstr "無效的辭典類型" - -#: src/config_reader.c:287 src/converter.c:747 src/dict_group.c:218 -#: src/opencc.c:271 -msgid "Unknown" -msgstr "未知" - -#: src/converter.c:741 src/dict_group.c:206 -msgid "No dictionary loaded" -msgstr "沒有辭典加載" - -#: src/converter.c:744 -msgid "Output buffer not enough for one segment" -msgstr "輸出緩衝區不足以存儲一個分詞" - -#: src/dict_group.c:209 -msgid "Can not open dictionary file" -msgstr "無法打開辭典" - -#: src/dict_group.c:212 -msgid "Invalid dictionary file" -msgstr "辭典格式無效" - -#: src/dict_group.c:215 -msgid "Invalid dictionary index" -msgstr "辭典索引無效" - -#: src/opencc.c:262 -msgid "Dictionary loading error" -msgstr "辭典讀取錯誤" - -#: src/opencc.c:265 -msgid "Configuration error" -msgstr "配置錯誤" - -#: src/opencc.c:268 -msgid "Converter error" -msgstr "轉換器錯誤" - -#: src/tools/opencc.c:39 -msgid "OpenCC initialization error" -msgstr "OpenCC初始化錯誤" - -#: src/tools/opencc.c:51 -#, c-format -msgid "Can not read file: %s\n" -msgstr "無法讀取文件:%s\n" - -#: src/tools/opencc.c:61 src/tools/opencc_dict.c:296 -#, c-format -msgid "Can not write file: %s\n" -msgstr "無法寫入文件: %s\n" - -#: src/tools/opencc.c:72 -msgid "OpenCC error" -msgstr "OpenCC 錯誤" - -#: src/tools/opencc.c:86 src/tools/opencc.c:89 src/tools/opencc.c:92 -#: src/tools/opencc.c:100 src/tools/opencc.c:107 src/tools/opencc.c:110 -#: src/tools/opencc_dict.c:246 src/tools/opencc_dict.c:254 -#: src/tools/opencc_dict.c:385 src/tools/opencc_dict.c:386 -#, c-format -msgid "\n" -msgstr "\n" - -#: src/tools/opencc.c:87 -#, c-format -msgid "Open Chinese Convert (OpenCC) Command Line Tool\n" -msgstr "Open Chinese Convert (OpenCC) 命令行工具\n" - -#: src/tools/opencc.c:88 -#, c-format -msgid "Version %s\n" -msgstr "版本 %s\n" - -#: src/tools/opencc.c:90 -#, c-format -msgid "Author: %s\n" -msgstr "作者: %s\n" - -#: src/tools/opencc.c:91 -#, c-format -msgid "Bug Report: %s\n" -msgstr "Bug彙報: %s\n" - -#: src/tools/opencc.c:98 src/tools/opencc_dict.c:379 -#, c-format -msgid "Usage:\n" -msgstr "使用方法:\n" - -#: src/tools/opencc.c:99 -#, c-format -msgid " opencc [Options]\n" -msgstr " opencc [參數]\n" - -#: src/tools/opencc.c:101 -#, c-format -msgid "Options:\n" -msgstr "參數:\n" - -#: src/tools/opencc.c:102 -#, c-format -msgid " -i [file], --input=[file] Read original text from [file].\n" -msgstr " -i [file], --input=[file] 從 [file] 讀取原始文本。\n" - -#: src/tools/opencc.c:103 -#, c-format -msgid " -o [file], --output=[file] Write converted text to [file].\n" -msgstr " -o [file], --output=[file] 將轉換後的文本寫入 [file].\n" - -#: src/tools/opencc.c:104 -#, c-format -msgid "" -" -c [file], --config=[file] Load configuration of conversion from [file].\n" -msgstr " -c [file], --config=[file] 從 [file] 中讀取配置。\n" - -#: src/tools/opencc.c:105 -#, c-format -msgid " -v, --version Print version and build information.\n" -msgstr " -v, --version 顯示版本和生成信息。\n" - -#: src/tools/opencc.c:106 -#, c-format -msgid " -h, --help Print this help.\n" -msgstr " -h, --help 顯示此幫助。\n" - -#: src/tools/opencc.c:108 -#, c-format -msgid "" -"With no input file, reads standard input and writes converted stream to " -"standard output.\n" -msgstr "如果沒有設置輸入文件,將會從標準輸入中讀取數據,並輸出到標準輸出。\n" - -#: src/tools/opencc.c:109 -#, c-format -msgid "Default configuration(%s) will be loaded if not set.\n" -msgstr "如果沒有設置config file,則會讀取默認配置文件(%s)。\n" - -#: src/tools/opencc.c:144 -#, c-format -msgid "Please use %s --help.\n" -msgstr "請使用%s --help以獲得幫助。\n" - -#: src/tools/opencc_dict.c:373 -#, c-format -msgid "" -"\n" -"Open Chinese Convert (OpenCC) Dictionary Tool\n" -"Version %s\n" -"\n" -msgstr "" -"\n" -"Open Chinese Convert (OpenCC) 辭典工具\n" -"版本 %s\n" -"\n" - -#: src/tools/opencc_dict.c:380 -#, c-format -msgid "" -" opencc_dict -i input_file -o output_file\n" -"\n" -msgstr "" -" opencc_dict -i input_file -o output_file\n" -"\n" - -#: src/tools/opencc_dict.c:381 -#, c-format -msgid " -i input_file\n" -msgstr " -i input_file\n" - -#: src/tools/opencc_dict.c:382 -#, c-format -msgid " Read data from input_file.\n" -msgstr " 從input_file讀取數據。\n" - -#: src/tools/opencc_dict.c:383 -#, c-format -msgid " -o output_file\n" -msgstr " -o output_file\n" - -#: src/tools/opencc_dict.c:384 -#, c-format -msgid " Write converted data to output_file.\n" -msgstr " 將生成的辭典寫入output_file。\n" - -#: src/tools/opencc_dict.c:432 -#, c-format -msgid "Please specify input file using -i.\n" -msgstr "請使用-i指定輸入文件。\n" - -#: src/tools/opencc_dict.c:439 -#, c-format -msgid "Please specify output file using -o.\n" -msgstr "請使用-o指定輸入文件。\n" - -#~ msgid "" -#~ " opencc [-i input_file] [-o output_file] [-c config_file]\n" -#~ "\n" -#~ msgstr "" -#~ " opencc [-i input_file] [-o output_file] [-c config_file]\n" -#~ "\n" - -#~ msgid " -c config_file\n" -#~ msgstr " -c config_file\n" - -#~ msgid " Load dictionary configuration from config_file.\n" -#~ msgstr " 從config_file讀取配置。\n" - -#~ msgid " Note:\n" -#~ msgstr " 註釋:\n" - -#~ msgid "" -#~ " Text from standard input will be read if input_file is not set\n" -#~ " and will be written to standard output if output_file is not set.\n" -#~ msgstr "" -#~ " 如果沒有設置input_file,將會從標準輸入讀取文本。如果\n" -#~ " 沒有設置output_file,將會把轉換後文本寫入到標準輸出。\n" diff --git a/release.sh b/release.sh deleted file mode 100755 index 34ddf33..0000000 --- a/release.sh +++ /dev/null @@ -1,11 +0,0 @@ -mkdir -p release \ -&& cd release \ -&& cmake \ - -D ENABLE_GETTEXT:BOOL=ON \ - -D BUILD_DOCUMENTATION:BOOL=ON \ - -DCMAKE_BUILD_TYPE=Release \ - -DCMAKE_INSTALL_PREFIX=/usr \ - .. \ -&& make \ -&& make test \ -&& make package_source diff --git a/src/BinaryDict.cpp b/src/BinaryDict.cpp new file mode 100644 index 0000000..1921b7f --- /dev/null +++ b/src/BinaryDict.cpp @@ -0,0 +1,181 @@ +/* + * Open Chinese Convert + * + * Copyright 2010-2014 BYVoid + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "BinaryDict.hpp" +#include "Lexicon.hpp" + +using namespace opencc; + +size_t BinaryDict::KeyMaxLength() const { + size_t maxLength = 0; + for (const DictEntry* entry : *lexicon) { + maxLength = std::max(maxLength, entry->KeyLength()); + } + return maxLength; +} + +void BinaryDict::SerializeToFile(FILE* fp) const { + string keyBuffer, valueBuffer; + vector keyOffsets, valueOffsets; + size_t keyTotalLength = 0, valueTotalLength = 0; + ConstructBuffer(keyBuffer, keyOffsets, keyTotalLength, + valueBuffer, valueOffsets, valueTotalLength); + // Number of items + size_t numItems = lexicon->Length(); + fwrite(&numItems, sizeof(size_t), 1, fp); + + // Data + fwrite(&keyTotalLength, sizeof(size_t), 1, fp); + fwrite(keyBuffer.c_str(), sizeof(char), keyTotalLength, fp); + fwrite(&valueTotalLength, sizeof(size_t), 1, fp); + fwrite(valueBuffer.c_str(), sizeof(char), valueTotalLength, fp); + + size_t keyCursor = 0, valueCursor = 0; + for (const DictEntry* entry : *lexicon) { + // Number of values + size_t numValues = entry->NumValues(); + fwrite(&numValues, sizeof(size_t), 1, fp); + // Key offset + size_t keyOffset = keyOffsets[keyCursor++]; + fwrite(&keyOffset, sizeof(size_t), 1, fp); + // Values offset + for (size_t i = 0; i < numValues; i++) { + size_t valueOffset = valueOffsets[valueCursor++]; + fwrite(&valueOffset, sizeof(size_t), 1, fp); + } + } + assert(keyCursor == numItems); +} + +BinaryDictPtr BinaryDict::NewFromFile(FILE* fp) { + BinaryDictPtr dict(new BinaryDict(LexiconPtr(new Lexicon))); + + // Number of items + size_t numItems; + size_t unitsRead = fread(&numItems, sizeof(size_t), 1, fp); + if (unitsRead != 1) { + throw InvalidFormat("Invalid OpenCC binary dictionary (numItems)"); + } + + // Keys + size_t keyTotalLength; + unitsRead = fread(&keyTotalLength, sizeof(size_t), 1, fp); + if (unitsRead != 1) { + throw InvalidFormat("Invalid OpenCC binary dictionary (keyTotalLength)"); + } + dict->keyBuffer.resize(keyTotalLength); + unitsRead = fread(const_cast(dict->keyBuffer.c_str()), + sizeof(char), keyTotalLength, fp); + if (unitsRead != keyTotalLength) { + throw InvalidFormat("Invalid OpenCC binary dictionary (keyBuffer)"); + } + + // Values + size_t valueTotalLength; + unitsRead = fread(&valueTotalLength, sizeof(size_t), 1, fp); + if (unitsRead != 1) { + throw InvalidFormat("Invalid OpenCC binary dictionary (valueTotalLength)"); + } + dict->valueBuffer.resize(valueTotalLength); + unitsRead = fread(const_cast(dict->valueBuffer.c_str()), + sizeof(char), valueTotalLength, fp); + if (unitsRead != valueTotalLength) { + throw InvalidFormat("Invalid OpenCC binary dictionary (valueBuffer)"); + } + + // Offsets + for (size_t i = 0; i < numItems; i++) { + // Number of values + size_t numValues; + unitsRead = fread(&numValues, sizeof(size_t), 1, fp); + if (unitsRead != 1) { + throw InvalidFormat("Invalid OpenCC binary dictionary (numValues)"); + } + // Key offset + size_t keyOffset; + unitsRead = fread(&keyOffset, sizeof(size_t), 1, fp); + if (unitsRead != 1) { + throw InvalidFormat("Invalid OpenCC binary dictionary (keyOffset)"); + } + const char* key = dict->keyBuffer.c_str() + keyOffset; + // Value offset + vector values; + for (size_t j = 0; j < numValues; j++) { + size_t valueOffset; + unitsRead = fread(&valueOffset, sizeof(size_t), 1, fp); + if (unitsRead != 1) { + throw InvalidFormat("Invalid OpenCC binary dictionary (valueOffset)"); + } + const char* value = dict->valueBuffer.c_str() + valueOffset; + values.push_back(value); + } + PtrDictEntry* entry = new PtrDictEntry(key, values); + dict->lexicon->Add(entry); + } + + return dict; +} + +void BinaryDict::ConstructBuffer(string& keyBuffer, + vector& keyOffset, + size_t& keyTotalLength, + string& valueBuffer, + vector& valueOffset, + size_t& valueTotalLength) const { + keyTotalLength = 0; + valueTotalLength = 0; + // Calculate total length + for (const DictEntry* entry : *lexicon) { + keyTotalLength += entry->KeyLength() + 1; + assert(entry->NumValues() != 0); + if (entry->NumValues() == 1) { + const auto* svEntry = static_cast(entry); + valueTotalLength += strlen(svEntry->Value()) + 1; + } else { + const auto* mvEntry = static_cast(entry); + for (const auto& value : mvEntry->Values()) { + valueTotalLength += strlen(value) + 1; + } + } + } + // Write keys and values to buffers + keyBuffer.resize(keyTotalLength, '\0'); + valueBuffer.resize(valueTotalLength, '\0'); + char* pKeyBuffer = const_cast(keyBuffer.c_str()); + char* pValueBuffer = const_cast(valueBuffer.c_str()); + for (const DictEntry* entry : *lexicon) { + strcpy(pKeyBuffer, entry->Key()); + keyOffset.push_back(pKeyBuffer - keyBuffer.c_str()); + pKeyBuffer += entry->KeyLength() + 1; + if (entry->NumValues() == 1) { + const auto* svEntry = static_cast(entry); + strcpy(pValueBuffer, svEntry->Value()); + valueOffset.push_back(pValueBuffer - valueBuffer.c_str()); + pValueBuffer += strlen(svEntry->Value()) + 1; + } else { + const auto* mvEntry = static_cast(entry); + for (const auto& value : mvEntry->Values()) { + strcpy(pValueBuffer, value); + valueOffset.push_back(pValueBuffer - valueBuffer.c_str()); + pValueBuffer += strlen(value) + 1; + } + } + } + assert(keyBuffer.c_str() + keyTotalLength == pKeyBuffer); + assert(valueBuffer.c_str() + valueTotalLength == pValueBuffer); +} diff --git a/src/BinaryDict.hpp b/src/BinaryDict.hpp new file mode 100644 index 0000000..eca601d --- /dev/null +++ b/src/BinaryDict.hpp @@ -0,0 +1,61 @@ +/* + * Open Chinese Convert + * + * Copyright 2010-2014 BYVoid + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "Common.hpp" +#include "SerializableDict.hpp" + +namespace opencc { +/** +* Binary dictionary for faster deserialization +* @ingroup opencc_cpp_api +*/ +class OPENCC_EXPORT BinaryDict : public SerializableDict { +public: + BinaryDict(const LexiconPtr& _lexicon) : + lexicon(_lexicon) { + } + + virtual ~BinaryDict() { + } + + virtual void SerializeToFile(FILE* fp) const; + + static BinaryDictPtr NewFromFile(FILE* fp); + + const LexiconPtr& GetLexicon() const { + return lexicon; + } + + size_t KeyMaxLength() const; + +private: + LexiconPtr lexicon; + string keyBuffer; + string valueBuffer; + + void ConstructBuffer(string& keyBuffer, + vector& keyOffset, + size_t& keyTotalLength, + string& valueBuffer, + vector& valueOffset, + size_t& valueTotalLength) const; +}; + +} diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 3d3606b..84868cd 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,122 +1,120 @@ -set( - LIBOPENCC_HEADERS - opencc.h - opencc_types.h - wrapper/cplusplus/openccxx.h -) +include (GenerateExportHeader) set( - LIBOPENCC_DICTIONARY_SOURCES - dict.c - dictionary/datrie.c - dictionary/text.c - dict.h - dictionary/datrie.h - dictionary/text.h + LIBOPENCC_HEADERS + BinaryDict.hpp + Common.hpp + Config.hpp + Conversion.hpp + ConversionChain.hpp + Converter.hpp + DartsDict.hpp + Dict.hpp + DictEntry.hpp + DictGroup.hpp + Exception.hpp + Export.hpp + Lexicon.hpp + MaxMatchSegmentation.hpp + Optional.hpp + Segmentation.hpp + Segments.hpp + SerializableDict.hpp + TextDict.hpp + UTF8Util.hpp + opencc.h ) set( - LIBOPENCC_SOURCES - ${LIBOPENCC_DICTIONARY_SOURCES} - config_reader.c - converter.c - dict_group.c - dict_chain.c - encoding.c - utils.c - opencc.c - config_reader.h - converter.h - dict_group.h - dict_chain.h - encoding.h - utils.h + LIBOPENCC_SOURCES + BinaryDict.cpp + Config.cpp + Conversion.cpp + ConversionChain.cpp + Converter.cpp + DartsDict.cpp + Dict.cpp + DictEntry.cpp + DictGroup.cpp + MaxMatchSegmentation.cpp + SimpleConverter.cpp + Segmentation.cpp + TextDict.cpp + UTF8Util.cpp ) -set (LIBOPENCC_TARGET libopencc) -set (LIBOPENCC_STATIC_TARGET libopencc_static) - -add_definitions( - -DPKGDATADIR="${DIR_SHARE_OPENCC}" - -DLOCALEDIR="${DIR_SHARE_LOCALE}" - -DVERSION="${OPENCC_VERSION}" - -DBYTEORDER=${BYTEORDER} - -DPACKAGE_NAME="${PACKAGE_NAME}" - -Wall -) +include_directories(../deps/darts-clone) +include_directories(../deps/rapidjson-0.11) +include_directories(../deps/tclap-1.2.1) add_library( - ${LIBOPENCC_TARGET} - SHARED - ${LIBOPENCC_SOURCES} + libopencc + ${LIBOPENCC_SOURCES} ) -add_library( - ${LIBOPENCC_STATIC_TARGET} - STATIC - ${LIBOPENCC_SOURCES} +GENERATE_EXPORT_HEADER( + libopencc + BASE_NAME OPENCC + EXPORT_MACRO_NAME OPENCC_EXPORT + EXPORT_FILE_NAME Opencc_Export.h + STATIC_DEFINE Opencc_BUILT_AS_STATIC ) set_target_properties( - ${LIBOPENCC_TARGET} - ${LIBOPENCC_STATIC_TARGET} - PROPERTIES - OUTPUT_NAME - opencc - VERSION - 1.0.0 - SOVERSION - 1 + libopencc + PROPERTIES + LINKER_LANGUAGE + CXX + OUTPUT_NAME + opencc + VERSION + 1.0.0 + SOVERSION + 2 ) -if (ENABLE_GETTEXT) - - add_definitions( - -DENABLE_GETTEXT - ) - - link_directories( - ${GETTEXT_LIBRARIES} - ) - - include_directories( - ${GETTEXT_INCLUDE_DIR} - ) - -endif (ENABLE_GETTEXT) - if (CMAKE_BUILD_TYPE MATCHES Debug) - - add_definitions( - -O0 - -g3 - ) - + add_definitions( + -O0 + -g3 + ) endif (CMAKE_BUILD_TYPE MATCHES Debug) -if (NOT WIN32) install( - TARGETS - ${LIBOPENCC_TARGET} - LIBRARY DESTINATION - ${DIR_LIBRARY} + TARGETS + libopencc + LIBRARY DESTINATION lib + ARCHIVE DESTINATION lib/static ) -endif (NOT WIN32) install( - TARGETS - ${LIBOPENCC_STATIC_TARGET} - ARCHIVE DESTINATION - ${DIR_LIBRARY_STATIC} + FILES + ${LIBOPENCC_HEADERS} + DESTINATION + ${DIR_INCLUDE}/opencc ) -install( - FILES - ${LIBOPENCC_HEADERS} - DESTINATION - ${DIR_INCLUDE}/opencc +add_executable( + opencc + CommandLine.cpp +) +target_link_libraries( + opencc + libopencc ) -include(symbols.cmake) +add_executable( + opencc_dict + DictConverter.cpp +) +target_link_libraries( + opencc_dict + libopencc +) -add_subdirectory(tools) +install( + TARGETS + opencc + opencc_dict + RUNTIME DESTINATION bin +) diff --git a/src/CmdLineOutput.hpp b/src/CmdLineOutput.hpp new file mode 100644 index 0000000..70c3f41 --- /dev/null +++ b/src/CmdLineOutput.hpp @@ -0,0 +1,47 @@ +/* + * Open Chinese Convert + * + * Copyright 2010-2014 BYVoid + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "tclap/CmdLine.h" + +class CmdLineOutput : public TCLAP::StdOutput { +public: + virtual void usage(TCLAP::CmdLineInterface& cmd) { + std::cout << std::endl + << cmd.getMessage() << std::endl + << "Author: Carbo Kuo " << std::endl + << "Bug Report: http://github.com/BYVoid/OpenCC/issues" + << std::endl << std::endl + << "Usage: " << std::endl + << std::endl; + + _shortUsage(cmd, std::cout); + std::cout << std::endl; + std::cout << "Options: " << std::endl << std::endl; + _longUsage(cmd, std::cout); + std::cout << std::endl; + } + + virtual void version(TCLAP::CmdLineInterface& cmd) { + std::cout << std::endl + << cmd.getMessage() << std::endl + << "Version: " << cmd.getVersion() << std::endl + << std::endl; + } +}; diff --git a/src/CommandLine.cpp b/src/CommandLine.cpp new file mode 100644 index 0000000..1b4956d --- /dev/null +++ b/src/CommandLine.cpp @@ -0,0 +1,179 @@ +/* + * Open Chinese Convert + * + * Copyright 2010-2014 BYVoid + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "CmdLineOutput.hpp" +#include "Config.hpp" +#include "Converter.hpp" +#include "UTF8Util.hpp" + +using namespace opencc; + +Optional inputFileName = Optional::Null(); +Optional outputFileName = Optional::Null(); +string configFileName; +bool noFlush; +Config config; +ConverterPtr converter; + +FILE* GetOutputStream() { + if (outputFileName.IsNull()) { + return stdout; + } else { + FILE* fp = fopen(outputFileName.Get().c_str(), "w"); + if (!fp) { + throw FileNotWritable(outputFileName.Get()); + } + return fp; + } +} + +void ConvertLineByLine() { + std::istream& inputStream = std::cin; + FILE* fout = GetOutputStream(); + while (!inputStream.eof()) { + string line; + std::getline(inputStream, line); + const string& converted = converter->Convert(line); + fputs(converted.c_str(), fout); + fputs("\n", fout); + if (!noFlush) { + // Flush every line if the output stream is stdout. + fflush(fout); + } + } + fclose(fout); +} + +void Convert() { + const int BUFFER_SIZE = 1024 * 1024; + static bool bufferInitialized = false; + static string buffer; + static char* bufferBegin; + static const char* bufferEnd; + static char* bufferPtr; + static size_t bufferSizeAvailble; + if (!bufferInitialized) { + bufferInitialized = true; + buffer.resize(BUFFER_SIZE + 1); + bufferBegin = const_cast(buffer.c_str()); + bufferEnd = buffer.c_str() + BUFFER_SIZE; + bufferPtr = bufferBegin; + bufferSizeAvailble = BUFFER_SIZE; + } + + FILE* fin = fopen(inputFileName.Get().c_str(), "r"); + if (!fin) { + throw FileNotFound(inputFileName.Get()); + } + FILE* fout = GetOutputStream(); + while (!feof(fin)) { + size_t length = fread(bufferPtr, sizeof(char), bufferSizeAvailble, fin); + bufferPtr[length] = '\0'; + size_t remainingLength = 0; + string remainingTemp; + if (length == bufferSizeAvailble) { + // fread may breaks UTF8 character + // Find the end of last character + char* lastChPtr = bufferBegin; + while (lastChPtr < bufferEnd) { + size_t nextCharLen = UTF8Util::NextCharLength(lastChPtr); + if (lastChPtr + nextCharLen > bufferEnd) { + break; + } + lastChPtr += nextCharLen; + } + remainingLength = bufferEnd - lastChPtr; + if (remainingLength > 0) { + remainingTemp = UTF8Util::FromSubstr(lastChPtr, remainingLength); + *lastChPtr = '\0'; + } + } + // Perform conversion + const string& converted = converter->Convert(buffer); + fputs(converted.c_str(), fout); + if (!noFlush) { + // Flush every line if the output stream is stdout. + fflush(fout); + } + // Reset pointer + bufferPtr = bufferBegin + remainingLength; + bufferSizeAvailble = BUFFER_SIZE - remainingLength; + if (remainingLength > 0) { + strncpy(bufferBegin, remainingTemp.c_str(), remainingLength); + } + } + fclose(fout); +} + +int main(int argc, const char* argv[]) { + try { + TCLAP::CmdLine cmd("Open Chinese Convert (OpenCC) Command Line Tool", + ' ', + VERSION); + CmdLineOutput cmdLineOutput; + cmd.setOutput(&cmdLineOutput); + + TCLAP::ValueArg configArg("c", "config", + "Configuration file", + false /* required */, + "s2t.json" /* default */, + "file" /* type */, + cmd); + TCLAP::ValueArg outputArg("o", "output", + "Write converted text to", + false /* required */, + "" /* default */, + "file" /* type */, + cmd); + TCLAP::ValueArg inputArg("i", "input", + "Read original text from", + false /* required */, + "" /* default */, + "file" /* type */, + cmd); + TCLAP::ValueArg noFlushArg("", "noflush", + "Disable flush for every line", + false /* required */, + false /* default */, + "bool" /* type */, + cmd); + cmd.parse(argc, argv); + configFileName = configArg.getValue(); + noFlush = noFlushArg.getValue(); + if (inputArg.isSet()) { + inputFileName = Optional(inputArg.getValue()); + } + if (outputArg.isSet()) { + outputFileName = Optional(outputArg.getValue()); + noFlush = true; + } + converter = config.NewFromFile(configFileName); + bool lineByLine = inputFileName.IsNull(); + if (lineByLine) { + ConvertLineByLine(); + } else { + Convert(); + } + } catch (TCLAP::ArgException& e) { + std::cerr << "error: " << e.error() + << " for arg " << e.argId() << std::endl; + } catch (Exception& e) { + std::cerr << e.what() << std::endl; + } + return 0; +} diff --git a/src/Common.hpp b/src/Common.hpp new file mode 100644 index 0000000..af02c75 --- /dev/null +++ b/src/Common.hpp @@ -0,0 +1,91 @@ +/* + * Open Chinese Convert + * + * Copyright 2010-2014 BYVoid + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +// Microsoft Visual C++ specific +#if defined(_MSC_VER) && (_MSC_VER >= 1020) +#pragma warning(disable : 4251 4266 4350 4503 4512 4514 4710 4820) +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "Exception.hpp" +#include "Export.hpp" +#include "Optional.hpp" + +using std::list; +using std::string; +using std::vector; + +// Forward decalarations and alias +namespace opencc { +class BinaryDict; +class Config; +class Conversion; +class ConversionChain; +class Converter; +class DartsDict; +class Dict; +class DictEntry; +class DictGroup; +class Lexicon; +class MultiValueDictEntry; +class NoValueDictEntry; +class Segmentation; +class Segments; +class SerializableDict; +class SingleValueDictEntry; +class TextDict; +typedef std::shared_ptr BinaryDictPtr; +typedef std::shared_ptr ConversionPtr; +typedef std::shared_ptr ConversionChainPtr; +typedef std::shared_ptr ConverterPtr; +typedef std::shared_ptr DartsDictPtr; +typedef std::shared_ptr DictPtr; +typedef std::shared_ptr DictGroupPtr; +typedef std::shared_ptr LexiconPtr; +typedef std::shared_ptr SegmentationPtr; +typedef std::shared_ptr SegmentsPtr; +typedef std::shared_ptr SerializableDictPtr; +typedef std::shared_ptr TextDictPtr; +} + +#ifndef PKGDATADIR +const string PACKAGE_DATA_DIRECTORY = ""; +#else // ifndef PKGDATADIR +const string PACKAGE_DATA_DIRECTORY = PKGDATADIR "/"; +#endif // ifndef PKGDATADIR + +#ifndef VERSION +# define VERSION "1.0.*" +#endif // ifndef VERSION diff --git a/src/Config.cpp b/src/Config.cpp new file mode 100644 index 0000000..1ca7d73 --- /dev/null +++ b/src/Config.cpp @@ -0,0 +1,240 @@ +/* + * Open Chinese Convert + * + * Copyright 2010-2014 BYVoid + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Config.hpp" +#include "ConversionChain.hpp" +#include "Converter.hpp" +#include "DartsDict.hpp" +#include "DictGroup.hpp" +#include "MaxMatchSegmentation.hpp" +#include "TextDict.hpp" + +#include "document.h" + +#include + +using namespace opencc; + +typedef rapidjson::GenericValue> JSONValue; + +namespace opencc { +class ConfigInternal { +public: + string configDirectory; + std::unordered_map>> dictCache; + + const JSONValue& GetProperty(const JSONValue& doc, const char* name) { + if (!doc.HasMember(name)) { + throw InvalidFormat("Required property not found: " + string(name)); + } + return doc[name]; + } + + const JSONValue& GetObjectProperty(const JSONValue& doc, const char* name) { + const JSONValue& obj = GetProperty(doc, name); + if (!obj.IsObject()) { + throw InvalidFormat("Property must be an object: " + string(name)); + } + return obj; + } + + const JSONValue& GetArrayProperty(const JSONValue& doc, const char* name) { + const JSONValue& obj = GetProperty(doc, name); + if (!obj.IsArray()) { + throw InvalidFormat("Property must be an array: " + string(name)); + } + return obj; + } + + const char* GetStringProperty(const JSONValue& doc, const char* name) { + const JSONValue& obj = GetProperty(doc, name); + if (!obj.IsString()) { + throw InvalidFormat("Property must be a string: " + string(name)); + } + return obj.GetString(); + } + + template + DictPtr LoadDictWithPaths(const string& fileName) { + // Working directory + std::shared_ptr dict; + if (SerializableDict::TryLoadFromFile(fileName, &dict)) { + return dict; + } + // Configuration directory + if ((configDirectory != "") && + SerializableDict::TryLoadFromFile(configDirectory + fileName, + &dict)) { + return dict; + } + // Package data directory + if ((PACKAGE_DATA_DIRECTORY != "") && + SerializableDict::TryLoadFromFile(PACKAGE_DATA_DIRECTORY + fileName, + &dict)) { + return dict; + } + throw FileNotFound(fileName); + } + + DictPtr ParseDict(const JSONValue& doc) { + // Required: type + string type = GetStringProperty(doc, "type"); + DictPtr dict; + if (type == "group") { + list dicts; + const JSONValue& docs = GetArrayProperty(doc, "dicts"); + for (rapidjson::SizeType i = 0; i < docs.Size(); i++) { + if (docs[i].IsObject()) { + DictPtr dict = ParseDict(docs[i]); + dicts.push_back(dict); + } else { + throw InvalidFormat("Element of the array must be an object"); + } + } + return DictGroupPtr(new DictGroup(dicts)); + } else { + string fileName = GetStringProperty(doc, "file"); + // Read from cache + DictPtr& cache = dictCache[type][configDirectory][fileName]; + if (cache != nullptr) { + return cache; + } + if (type == "text") { + dict = LoadDictWithPaths(fileName); + } else if (type == "ocd") { + dict = LoadDictWithPaths(fileName); + } else { + throw InvalidFormat("Unknown dictionary type: " + type); + } + // Update Cache + cache = dict; + return dict; + } + } + + SegmentationPtr ParseSegmentation(const JSONValue& doc) { + SegmentationPtr segmentation; + + // Required: type + string type = GetStringProperty(doc, "type"); + if (type == "mmseg") { + // Required: dict + DictPtr dict = ParseDict(GetObjectProperty(doc, "dict")); + segmentation = SegmentationPtr(new MaxMatchSegmentation(dict)); + } else { + throw InvalidFormat("Unknown segmentation type: " + type); + } + return segmentation; + } + + ConversionPtr ParseConversion(const JSONValue& doc) { + // Required: dict + DictPtr dict = ParseDict(GetObjectProperty(doc, "dict")); + ConversionPtr conversion(new Conversion(dict)); + + return conversion; + } + + ConversionChainPtr ParseConversionChain(const JSONValue& docs) { + list conversions; + for (rapidjson::SizeType i = 0; i < docs.Size(); i++) { + const JSONValue& doc = docs[i]; + if (doc.IsObject()) { + ConversionPtr conversion = ParseConversion(doc); + conversions.push_back(conversion); + } else {} + } + ConversionChainPtr chain(new ConversionChain(conversions)); + return chain; + } + + string FindConfigFile(string fileName) { + std::ifstream ifs; + + // Working directory + ifs.open(fileName.c_str()); + if (ifs.is_open()) { + return fileName; + } + // Package data directory + if (PACKAGE_DATA_DIRECTORY != "") { + string prefixedFileName = PACKAGE_DATA_DIRECTORY + fileName; + ifs.open(prefixedFileName.c_str()); + if (ifs.is_open()) { + return prefixedFileName; + } + } + throw FileNotFound(fileName); + } +}; +}; + +Config::Config() : internal(new ConfigInternal()) { +} + +Config::~Config() { + delete (ConfigInternal*)internal; +} + +ConverterPtr Config::NewFromFile(const string& fileName) { + ConfigInternal* impl = (ConfigInternal*)internal; + string prefixedFileName = impl->FindConfigFile(fileName); + std::ifstream ifs(prefixedFileName); + string content(std::istreambuf_iterator(ifs), + (std::istreambuf_iterator())); + +#if defined(_WIN32) || defined(_WIN64) + UTF8Util::ReplaceAll(prefixedFileName, "\\", "/"); +#endif // if defined(_WIN32) || defined(_WIN64) + size_t slashPos = prefixedFileName.rfind("/"); + string configDirectory = ""; + if (slashPos != string::npos) { + configDirectory = prefixedFileName.substr(0, slashPos) + "/"; + } + return NewFromString(content, configDirectory); +} + +ConverterPtr Config::NewFromString(const string& json, const string& configDirectory) { + rapidjson::Document doc; + + doc.ParseInsitu<0>(const_cast(json.c_str())); + if (doc.HasParseError()) { + throw InvalidFormat("Error parsing JSON"); // doc.GetErrorOffset() + } + if (!doc.IsObject()) { + throw InvalidFormat("Root of configuration must be an object"); + } + + // Optional: name + string name; + if (doc.HasMember("name") && doc["name"].IsString()) { + name = doc["name"].GetString(); + } + + ConfigInternal* impl = (ConfigInternal*)internal; + impl->configDirectory = configDirectory; + + // Required: segmentation + SegmentationPtr segmentation = impl->ParseSegmentation( + impl->GetObjectProperty(doc, "segmentation")); + + // Required: conversion_chain + ConversionChainPtr chain = impl->ParseConversionChain( + impl->GetArrayProperty(doc, "conversion_chain")); + return ConverterPtr(new Converter(name, segmentation, chain)); +} diff --git a/src/config_reader.h b/src/Config.hpp similarity index 51% rename from src/config_reader.h rename to src/Config.hpp index 46c6416..2595192 100644 --- a/src/config_reader.h +++ b/src/Config.hpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2010-2013 BYVoid + * Copyright 2010-2014 BYVoid * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,28 +16,27 @@ * limitations under the License. */ -#ifndef __OPENCC_CONFIG_H_ -#define __OPENCC_CONFIG_H_ +#pragma once -#include "common.h" -#include "dict_chain.h" +#include "Common.hpp" -typedef enum { - CONFIG_ERROR_VOID, - CONFIG_ERROR_CANNOT_ACCESS_CONFIG_FILE, - CONFIG_ERROR_PARSE, - CONFIG_ERROR_NO_PROPERTY, - CONFIG_ERROR_INVALID_DICT_TYPE, -} config_error; +namespace opencc { +/** +* Configuration loader +* @ingroup opencc_cpp_api +*/ +class OPENCC_EXPORT Config { +public: + Config(); -Config* config_open(const char* filename); + virtual ~Config(); -void config_close(Config* config); + ConverterPtr NewFromString(const string& json, + const string& configDirectory); -DictChain* config_get_dict_chain(Config* config); + ConverterPtr NewFromFile(const string& fileName); -config_error config_errno(void); - -void config_perror(const char* spec); - -#endif /* __OPENCC_CONFIG_H_ */ +private: + void* internal; +}; +} diff --git a/src/Conversion.cpp b/src/Conversion.cpp new file mode 100644 index 0000000..089b321 --- /dev/null +++ b/src/Conversion.cpp @@ -0,0 +1,51 @@ +/* + * Open Chinese Convert + * + * Copyright 2010-2014 BYVoid + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Conversion.hpp" +#include "Dict.hpp" + +using namespace opencc; + +string Conversion::Convert(const char* phrase) const { + std::ostringstream buffer; + for (const char* pstr = phrase; *pstr != '\0';) { + Optional matched = dict->MatchPrefix(pstr); + size_t matchedLength; + if (matched.IsNull()) { + matchedLength = UTF8Util::NextCharLength(pstr); + buffer << UTF8Util::FromSubstr(pstr, matchedLength); + } else { + matchedLength = matched.Get()->KeyLength(); + buffer << matched.Get()->GetDefault(); + } + pstr += matchedLength; + } + return buffer.str(); +} + +string Conversion::Convert(const string& phrase) const { + return Convert(phrase.c_str()); +} + +SegmentsPtr Conversion::Convert(const SegmentsPtr& input) const { + SegmentsPtr output(new Segments); + for (const char* segment : *input) { + output->AddSegment(Convert(segment)); + } + return output; +} diff --git a/src/Conversion.hpp b/src/Conversion.hpp new file mode 100644 index 0000000..8b03808 --- /dev/null +++ b/src/Conversion.hpp @@ -0,0 +1,50 @@ +/* + * Open Chinese Convert + * + * Copyright 2010-2014 BYVoid + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "Common.hpp" +#include "Segmentation.hpp" + +namespace opencc { +/** +* Conversion interface +* @ingroup opencc_cpp_api +*/ +class OPENCC_EXPORT Conversion { +public: + Conversion(DictPtr _dict) : dict(_dict) { + } + + // Convert single phrase + string Convert(const string& phrase) const; + + // Convert single phrase + string Convert(const char* phrase) const; + + // Convert segmented text + SegmentsPtr Convert(const SegmentsPtr& input) const; + + const DictPtr GetDict() const { + return dict; + } + +private: + const DictPtr dict; +}; +} diff --git a/src/ConversionChain.cpp b/src/ConversionChain.cpp new file mode 100644 index 0000000..302d395 --- /dev/null +++ b/src/ConversionChain.cpp @@ -0,0 +1,34 @@ +/* + * Open Chinese Convert + * + * Copyright 2010-2014 BYVoid + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ConversionChain.hpp" +#include "Segments.hpp" + +using namespace opencc; + +ConversionChain::ConversionChain(const list _conversions) + : conversions(_conversions) { +} + +SegmentsPtr ConversionChain::Convert(const SegmentsPtr& input) const { + SegmentsPtr output = input; + for (auto conversion : conversions) { + output = conversion->Convert(output); + } + return output; +} diff --git a/src/ConversionChain.hpp b/src/ConversionChain.hpp new file mode 100644 index 0000000..a2c8e1c --- /dev/null +++ b/src/ConversionChain.hpp @@ -0,0 +1,43 @@ +/* + * Open Chinese Convert + * + * Copyright 2010-2014 BYVoid + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "Common.hpp" +#include "Conversion.hpp" + +namespace opencc { +/** +* Chain of conversions +* Consists of a list of conversions. Converts input in sequence. +* @ingroup opencc_cpp_api +*/ +class OPENCC_EXPORT ConversionChain { +public: + ConversionChain(const list _conversions); + + SegmentsPtr Convert(const SegmentsPtr& input) const; + + const list GetConversions() const { + return conversions; + } + +private: + const list conversions; +}; +} diff --git a/src/Converter.cpp b/src/Converter.cpp new file mode 100644 index 0000000..347b4dc --- /dev/null +++ b/src/Converter.cpp @@ -0,0 +1,35 @@ +/* + * Open Chinese Convert + * + * Copyright 2010-2014 BYVoid + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ConversionChain.hpp" +#include "Converter.hpp" +#include "Segments.hpp" + +using namespace opencc; + +string Converter::Convert(const string& text) const { + const SegmentsPtr& segments = segmentation->Segment(text); + const SegmentsPtr& converted = conversionChain->Convert(segments); + return converted->ToString(); +} + +size_t Converter::Convert(const char* input, char* output) const { + const string& converted = Convert(input); + strcpy(output, converted.c_str()); + return converted.length(); +} diff --git a/src/Converter.hpp b/src/Converter.hpp new file mode 100644 index 0000000..f080c0a --- /dev/null +++ b/src/Converter.hpp @@ -0,0 +1,55 @@ +/* + * Open Chinese Convert + * + * Copyright 2010-2014 BYVoid + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "Common.hpp" +#include "Segmentation.hpp" + +namespace opencc { +/** +* Controller of segmentation and conversion +* @ingroup opencc_cpp_api +*/ +class OPENCC_EXPORT Converter { +public: + Converter(const string& _name, + SegmentationPtr _segmentation, + ConversionChainPtr _conversionChain) : + name(_name), segmentation(_segmentation), + conversionChain(_conversionChain) { + } + + string Convert(const string& text) const; + + size_t Convert(const char* input, char* output) const; + + const SegmentationPtr GetSegmentation() const { + return segmentation; + } + + const ConversionChainPtr GetConversionChain() const { + return conversionChain; + } + +private: + const string name; + const SegmentationPtr segmentation; + const ConversionChainPtr conversionChain; +}; +} diff --git a/src/DartsDict.cpp b/src/DartsDict.cpp new file mode 100644 index 0000000..b461d21 --- /dev/null +++ b/src/DartsDict.cpp @@ -0,0 +1,168 @@ +/* + * Open Chinese Convert + * + * Copyright 2010-2014 BYVoid + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "BinaryDict.hpp" +#include "DartsDict.hpp" +#include "darts.h" +#include "Lexicon.hpp" + +using namespace opencc; + +static const char* OCDHEADER = "OPENCCDARTS1"; + +class DartsDict::DartsInternal { +public: + BinaryDictPtr binary; + void* buffer; + Darts::DoubleArray* doubleArray; + + DartsInternal() : binary(nullptr), buffer(nullptr), doubleArray(nullptr) { + } + + ~DartsInternal() { + if (buffer != nullptr) { + free(buffer); + } + if (doubleArray != nullptr) { + delete doubleArray; + } + } +}; + +DartsDict::DartsDict() { + internal = new DartsInternal; +} + +DartsDict::~DartsDict() { + delete internal; +} + +size_t DartsDict::KeyMaxLength() const { + return maxLength; +} + +Optional DartsDict::Match(const char* word) const { + Darts::DoubleArray& dict = *internal->doubleArray; + Darts::DoubleArray::result_pair_type result; + + dict.exactMatchSearch(word, result); + if (result.value != -1) { + return Optional( + lexicon->At(static_cast(result.value))); + } else { + return Optional::Null(); + } +} + +Optional DartsDict::MatchPrefix(const char* word) const { + const size_t DEFAULT_NUM_ENTRIES = 64; + Darts::DoubleArray& dict = *internal->doubleArray; + Darts::DoubleArray::value_type results[DEFAULT_NUM_ENTRIES]; + Darts::DoubleArray::value_type maxMatchedResult; + size_t numMatched = dict.commonPrefixSearch(word, results, DEFAULT_NUM_ENTRIES); + if (numMatched == 0) { + return Optional::Null(); + } else if ((numMatched > 0) && (numMatched < DEFAULT_NUM_ENTRIES)) { + maxMatchedResult = results[numMatched - 1]; + } else { + Darts::DoubleArray::value_type* rematchedResults = + new Darts::DoubleArray::value_type[numMatched]; + numMatched = dict.commonPrefixSearch(word, rematchedResults, numMatched); + maxMatchedResult = rematchedResults[numMatched - 1]; + delete[] rematchedResults; + } + if (maxMatchedResult >= 0) { + return Optional( + lexicon->At(static_cast(maxMatchedResult))); + } else { + return Optional::Null(); + } +} + +LexiconPtr DartsDict::GetLexicon() const { + return lexicon; +} + +DartsDictPtr DartsDict::NewFromFile(FILE* fp) { + DartsDictPtr dict(new DartsDict()); + + Darts::DoubleArray* doubleArray = new Darts::DoubleArray(); + size_t headerLen = strlen(OCDHEADER); + void* buffer = malloc(sizeof(char) * headerLen); + size_t bytesRead = fread(buffer, sizeof(char), headerLen, fp); + if (bytesRead != headerLen || memcmp(buffer, OCDHEADER, headerLen) != 0) { + throw InvalidFormat("Invalid OpenCC dictionary header"); + } + free(buffer); + + size_t dartsSize; + bytesRead = fread(&dartsSize, sizeof(size_t), 1, fp); + if (bytesRead * sizeof(size_t) != sizeof(size_t)) { + throw InvalidFormat("Invalid OpenCC dictionary header (dartsSize)"); + } + buffer = malloc(dartsSize); + bytesRead = fread(buffer, 1, dartsSize, fp); + if (bytesRead != dartsSize) { + throw InvalidFormat("Invalid OpenCC dictionary size of darts mismatch"); + } + doubleArray->set_array(buffer); + + auto internal = dict->internal; + internal->buffer = buffer; + internal->binary = BinaryDict::NewFromFile(fp); + internal->doubleArray = doubleArray; + dict->lexicon = internal->binary->GetLexicon(); + dict->maxLength = internal->binary->KeyMaxLength(); + return dict; +} + +DartsDictPtr DartsDict::NewFromDict(const Dict& thatDict) { + DartsDictPtr dict(new DartsDict()); + + Darts::DoubleArray* doubleArray = new Darts::DoubleArray(); + vector keys; + size_t maxLength = 0; + const LexiconPtr& lexicon = thatDict.GetLexicon(); + size_t lexiconCount = lexicon->Length(); + keys.resize(lexiconCount); + for (size_t i = 0; i < lexiconCount; i++) { + const DictEntry* entry = lexicon->At(i); + keys[i] = entry->Key(); + maxLength = std::max(entry->KeyLength(), maxLength); + } + doubleArray->build(lexicon->Length(), &keys[0]); + dict->lexicon = lexicon; + dict->maxLength = maxLength; + auto internal = dict->internal; + internal->doubleArray = doubleArray; + return dict; +} + +void DartsDict::SerializeToFile(FILE* fp) const { + Darts::DoubleArray& dict = *internal->doubleArray; + + fwrite(OCDHEADER, sizeof(char), strlen(OCDHEADER), fp); + + size_t dartsSize = dict.total_size(); + fwrite(&dartsSize, sizeof(size_t), 1, fp); + fwrite(dict.array(), sizeof(char), dartsSize, fp); + + auto internal = this->internal; + internal->binary.reset(new BinaryDict(lexicon)); + internal->binary->SerializeToFile(fp); +} diff --git a/src/DartsDict.hpp b/src/DartsDict.hpp new file mode 100644 index 0000000..bb4394e --- /dev/null +++ b/src/DartsDict.hpp @@ -0,0 +1,60 @@ +/* + * Open Chinese Convert + * + * Copyright 2010-2014 BYVoid + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "Common.hpp" +#include "SerializableDict.hpp" + +namespace opencc { +/** +* Darts dictionary +* @ingroup opencc_cpp_api +*/ +class OPENCC_EXPORT DartsDict : public Dict, public SerializableDict { +public: + virtual ~DartsDict(); + + virtual size_t KeyMaxLength() const; + + virtual Optional Match(const char* word) const; + + virtual Optional MatchPrefix(const char* word) const; + + virtual LexiconPtr GetLexicon() const; + + virtual void SerializeToFile(FILE* fp) const; + + /** + * Constructs a DartsDict from another dictionary. + */ + static DartsDictPtr NewFromDict(const Dict& thatDict); + + static DartsDictPtr NewFromFile(FILE* fp); + +private: + DartsDict(); + + size_t maxLength; + LexiconPtr lexicon; + + class DartsInternal; + DartsInternal* internal; +}; + +} diff --git a/src/Dict.cpp b/src/Dict.cpp new file mode 100644 index 0000000..755b67c --- /dev/null +++ b/src/Dict.cpp @@ -0,0 +1,52 @@ +/* + * Open Chinese Convert + * + * Copyright 2010-2014 BYVoid + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Dict.hpp" + +using namespace opencc; + +Optional Dict::MatchPrefix(const char* word) const { + string wordTrunc = UTF8Util::TruncateUTF8(word, KeyMaxLength()); + const char* wordTruncPtr = wordTrunc.c_str() + wordTrunc.length(); + for (long len = static_cast(wordTrunc.length()); len > 0;) { + wordTrunc.resize(static_cast(len)); + wordTruncPtr = wordTrunc.c_str() + len; + const Optional& result = Match(wordTrunc.c_str()); + if (!result.IsNull()) { + return result; + } + len -= UTF8Util::PrevCharLength(wordTruncPtr); + } + return Optional::Null(); +} + +vector Dict::MatchAllPrefixes(const char* word) const { + vector matchedLengths; + string wordTrunc = UTF8Util::TruncateUTF8(word, KeyMaxLength()); + const char* wordTruncPtr = wordTrunc.c_str() + wordTrunc.length(); + for (long len = static_cast(wordTrunc.length()); len > 0; + len -= UTF8Util::PrevCharLength(wordTruncPtr)) { + wordTrunc.resize(static_cast(len)); + wordTruncPtr = wordTrunc.c_str() + len; + const Optional& result = Match(wordTrunc.c_str()); + if (!result.IsNull()) { + matchedLengths.push_back(result.Get()); + } + } + return matchedLengths; +} diff --git a/src/Dict.hpp b/src/Dict.hpp new file mode 100644 index 0000000..f923f37 --- /dev/null +++ b/src/Dict.hpp @@ -0,0 +1,81 @@ +/* + * Open Chinese Convert + * + * Copyright 2010-2014 BYVoid + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "Common.hpp" +#include "DictEntry.hpp" + +namespace opencc { +/** +* Abstract class of dictionary +* @ingroup opencc_cpp_api +*/ +class OPENCC_EXPORT Dict { +public: + /** + * Matches a word exactly and returns the DictEntry or Optional::Null(). + */ + virtual Optional Match(const char* word) const = 0; + + /** + * Matches a word exactly and returns the DictEntry or Optional::Null(). + */ + Optional Match(const string& word) const { + return Match(word.c_str()); + } + + /** + * Matches the longest matched prefix of a word. + * For example given a dictionary having "a", "an", "b", "ba", "ban", "bana", + * the longest prefix of "banana" matched is "bana". + */ + virtual Optional MatchPrefix(const char* word) const; + + /** + * Matches the longest matched prefix of a word. + */ + Optional MatchPrefix(const string& word) const { + return MatchPrefix(word.c_str()); + } + + /** + * Returns all matched prefixes of a word, sorted by the length (desc). + * For example given a dictionary having "a", "an", "b", "ba", "ban", "bana", + * all the matched prefixes of "banana" are "bana", "ban", "ba", "b". + */ + virtual vector MatchAllPrefixes(const char* word) const; + + /** + * Returns all matched prefixes of a word, sorted by the length (desc). + */ + vector MatchAllPrefixes(const string& word) const { + return MatchAllPrefixes(word.c_str()); + } + + /** + * Returns the length of the longest key in the dictionary. + */ + virtual size_t KeyMaxLength() const = 0; + + /** + * Returns all entries in the dictionary. + */ + virtual LexiconPtr GetLexicon() const = 0; +}; +} diff --git a/src/DictConverter.cpp b/src/DictConverter.cpp new file mode 100644 index 0000000..3b09568 --- /dev/null +++ b/src/DictConverter.cpp @@ -0,0 +1,105 @@ +/* + * Open Chinese Convert + * + * Copyright 2010-2014 BYVoid + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "CmdLineOutput.hpp" +#include "DartsDict.hpp" +#include "TextDict.hpp" + +using namespace opencc; + +DictPtr LoadDictionary(const string& format, + const string& inputFileName) { + if (format == "text") { + return SerializableDict::NewFromFile(inputFileName); + } else if (format == "ocd") { + return SerializableDict::NewFromFile(inputFileName); + } else { + fprintf(stderr, "Unknown dictionary format: %s\n", format.c_str()); + exit(2); + } + return nullptr; +} + +SerializableDictPtr ConvertDictionary(const string& format, + const DictPtr dict) { + if (format == "text") { + return TextDict::NewFromDict(*dict.get()); + } else if (format == "ocd") { + return DartsDict::NewFromDict(*dict.get()); + } else { + fprintf(stderr, "Unknown dictionary format: %s\n", format.c_str()); + exit(2); + } + return nullptr; +} + +void ConvertDictionary(const string inputFileName, + const string outputFileName, + const string formatFrom, + const string formatTo) { + DictPtr dictFrom = LoadDictionary(formatFrom, inputFileName); + SerializableDictPtr dictTo = ConvertDictionary(formatTo, dictFrom); + dictTo->SerializeToFile(outputFileName); +} + +int main(int argc, const char* argv[]) { + try { + TCLAP::CmdLine cmd("Open Chinese Convert (OpenCC) Dictionary Tool", + ' ', + VERSION); + CmdLineOutput cmdLineOutput; + cmd.setOutput(&cmdLineOutput); + + vector dictFormats{"text", "ocd"}; + TCLAP::ValuesConstraint allowedVals(dictFormats); + + TCLAP::ValueArg toArg("t", "to", + "Output format", + true /* required */, + "" /* default */, + &allowedVals /* type */, + cmd); + TCLAP::ValueArg fromArg("f", "from", + "Input format", + true /* required */, + "" /* default */, + &allowedVals /* type */, + cmd); + TCLAP::ValueArg outputArg("o", "output", + "Path to output dictionary", + true /* required */, + "" /* default */, + "file" /* type */, + cmd); + TCLAP::ValueArg inputArg("i", "input", + "Path to input dictionary", + true /* required */, + "" /* default */, + "file" /* type */, + cmd); + cmd.parse(argc, argv); + ConvertDictionary(inputArg.getValue(), outputArg.getValue(), + fromArg.getValue(), toArg.getValue()); + } catch (TCLAP::ArgException& e) { + std::cerr << "error: " << e.error() + << " for arg " << e.argId() << std::endl; + } catch (Exception& e) { + std::cerr << e.what() << std::endl; + } + return 0; +} diff --git a/src/DictEntry.cpp b/src/DictEntry.cpp new file mode 100644 index 0000000..0c80805 --- /dev/null +++ b/src/DictEntry.cpp @@ -0,0 +1,38 @@ +/* + * Open Chinese Convert + * + * Copyright 2010-2014 BYVoid + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "DictEntry.hpp" + +using namespace opencc; + + +string MultiValueDictEntry::ToString() const { + // TODO escape space + size_t i = 0; + size_t length = Values().size(); + std::ostringstream buffer; + buffer << Key() << '\t'; + for (const char* value : Values()) { + buffer << value; + if (i < length - 1) { + buffer << ' '; + } + i++; + } + return buffer.str(); +} \ No newline at end of file diff --git a/src/DictEntry.hpp b/src/DictEntry.hpp new file mode 100644 index 0000000..00f2778 --- /dev/null +++ b/src/DictEntry.hpp @@ -0,0 +1,241 @@ +/* + * Open Chinese Convert + * + * Copyright 2010-2014 BYVoid + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "Common.hpp" +#include "UTF8Util.hpp" +#include "Segments.hpp" + +namespace opencc { +/** +* Key-values pair entry +* @ingroup opencc_cpp_api +*/ +class OPENCC_EXPORT DictEntry { +public: + virtual ~DictEntry() { + } + + virtual const char* Key() const = 0; + + virtual vector Values() const = 0; + + virtual const char* GetDefault() const = 0; + + virtual size_t NumValues() const = 0; + + virtual string ToString() const = 0; + + size_t KeyLength() const { + return strlen(Key()); + } + + bool operator<(const DictEntry& that) const { + return strcmp(Key(), that.Key()) < 0; + } + + bool operator==(const DictEntry& that) const { + return strcmp(Key(), that.Key()) == 0; + } + + static bool PtrLessThan(const DictEntry* a, const DictEntry* b) { + return *a < *b; + } +}; + +class OPENCC_EXPORT NoValueDictEntry : public DictEntry { +public: + NoValueDictEntry(const string& _key) : key(_key) { + } + + virtual ~NoValueDictEntry() { + } + + virtual const char* Key() const { + return key.c_str(); + } + + virtual vector Values() const { + return vector(); + } + + virtual const char* GetDefault() const { + return Key(); + } + + virtual size_t NumValues() const { + return 0; + } + + virtual string ToString() const { + return key; + } + +private: + string key; +}; + +class OPENCC_EXPORT SingleValueDictEntry : public DictEntry { +public: + virtual const char* Value() const = 0; + + virtual vector Values() const { + return vector{Value()}; + } + + virtual const char* GetDefault() const { + return Value(); + } + + virtual size_t NumValues() const { + return 1; + } + + virtual string ToString() const { + return string(Key()) + "\t" + Value(); + } +}; + +class OPENCC_EXPORT StrSingleValueDictEntry : public SingleValueDictEntry { +public: + StrSingleValueDictEntry(const string& _key, const string& _value) + : key(_key), value(_value) { + } + + virtual ~StrSingleValueDictEntry() { + } + + virtual const char* Key() const { + return key.c_str(); + } + + virtual const char* Value() const { + return value.c_str(); + } + +private: + string key; + string value; +}; + +class OPENCC_EXPORT MultiValueDictEntry : public DictEntry { +public: + virtual const char* GetDefault() const { + if (NumValues() > 0) { + return Values().at(0); + } else { + return Key(); + } + } + + virtual string ToString() const; +}; + +class OPENCC_EXPORT StrMultiValueDictEntry : public MultiValueDictEntry { +public: + StrMultiValueDictEntry(const string& _key, const vector& _values) + : key(_key), values(_values) { + } + + StrMultiValueDictEntry(const string& _key, const vector& _values) + : key(_key) { + values.reserve(_values.size()); + for (const char* str : _values) { + values.push_back(str); + } + } + + virtual ~StrMultiValueDictEntry() { + } + + virtual const char* Key() const { + return key.c_str(); + } + + size_t NumValues() const { + return values.size(); + } + + vector Values() const { + vector values; + for (const string& value : this->values) { + values.push_back(value.c_str()); + } + return values; + } + +private: + string key; + vector values; +}; + +class OPENCC_EXPORT PtrDictEntry : public MultiValueDictEntry { +public: + PtrDictEntry(const char* _key, const vector& _values) + : key(_key), values(_values) { + } + + virtual ~PtrDictEntry() { + } + + virtual const char* Key() const { + return key; + } + + size_t NumValues() const { + return values.size(); + } + + vector Values() const { + return values; + } + +private: + const char* key; + vector values; +}; + +class OPENCC_EXPORT DictEntryFactory { +public: + static DictEntry* New(const string& key) { + return new NoValueDictEntry(key); + } + + static DictEntry* New(const string& key, const string& value) { + return new StrSingleValueDictEntry(key, value); + } + + static DictEntry* New(const string& key, const vector& values) { + return new StrMultiValueDictEntry(key, values); + } + + static DictEntry* New(const DictEntry* entry) { + if (entry->NumValues() == 0) { + return new NoValueDictEntry(entry->Key()); + } else if (entry->NumValues() == 1) { + const auto svEntry = static_cast(entry); + return new StrSingleValueDictEntry(svEntry->Key(), svEntry->Value()); + } else { + const auto mvEntry = static_cast(entry); + return new StrMultiValueDictEntry(mvEntry->Key(), mvEntry->Values()); + } + } +}; + +} diff --git a/src/DictGroup.cpp b/src/DictGroup.cpp new file mode 100644 index 0000000..e45cf23 --- /dev/null +++ b/src/DictGroup.cpp @@ -0,0 +1,92 @@ +/* + * Open Chinese Convert + * + * Copyright 2010-2014 BYVoid + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "DictGroup.hpp" +#include "Lexicon.hpp" +#include "TextDict.hpp" + +using namespace opencc; + +DictGroup::DictGroup(const list& _dicts) : + keyMaxLength(0), dicts(_dicts) { +} + +DictGroup::~DictGroup() { +} + +size_t DictGroup::KeyMaxLength() const { + return keyMaxLength; +} + +Optional DictGroup::Match(const char* word) const { + for (const auto& dict : dicts) { + const Optional& prefix = dict->Match(word); + if (!prefix.IsNull()) { + return prefix; + } + } + return Optional::Null(); +} + +Optional DictGroup::MatchPrefix(const char* word) const { + for (const auto& dict : dicts) { + const Optional& prefix = dict->MatchPrefix(word); + if (!prefix.IsNull()) { + return prefix; + } + } + return Optional::Null(); +} + +vector DictGroup::MatchAllPrefixes(const char* word) const { + std::map matched; + // Match all prefixes from all dictionaries + for (const auto& dict : dicts) { + const vector& entries = dict->MatchAllPrefixes(word); + for (const auto& entry : entries) { + size_t len = entry->KeyLength(); + // If the current length has already result, skip + if (matched.find(len) == matched.end()) { + matched[len] = entry; + } + } + } + vector matchedEntries; + for (auto i = matched.rbegin(); i != matched.rend(); i++) { + matchedEntries.push_back(i->second); + } + return matchedEntries; +} + +LexiconPtr DictGroup::GetLexicon() const { + LexiconPtr allLexicon(new Lexicon); + for (const auto& dict : dicts) { + const auto& lexicon = dict->GetLexicon(); + for (const auto& item : *lexicon) { + allLexicon->Add(DictEntryFactory::New(item)); + } + } + allLexicon->Sort(); + // Fixme deduplicate + return allLexicon; +} + +DictGroupPtr DictGroup::NewFromDict(const Dict& dict) { + TextDictPtr newDict = TextDict::NewFromDict(dict); + return DictGroupPtr(new DictGroup(list{newDict})); +} diff --git a/src/DictGroup.hpp b/src/DictGroup.hpp new file mode 100644 index 0000000..9941527 --- /dev/null +++ b/src/DictGroup.hpp @@ -0,0 +1,55 @@ +/* + * Open Chinese Convert + * + * Copyright 2010-2014 BYVoid + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "Common.hpp" +#include "Dict.hpp" + +namespace opencc { +/** +* Group of dictionaries +* @ingroup opencc_cpp_api +*/ +class OPENCC_EXPORT DictGroup : public Dict { +public: + DictGroup(const list& dicts); + + static DictGroupPtr NewFromDict(const Dict& dict); + + virtual ~DictGroup(); + + virtual size_t KeyMaxLength() const; + + virtual Optional Match(const char* word) const; + + virtual Optional MatchPrefix(const char* word) const; + + virtual vector MatchAllPrefixes(const char* word) const; + + virtual LexiconPtr GetLexicon() const; + + const list GetDicts() const { + return dicts; + } + +private: + const size_t keyMaxLength; + const list dicts; +}; +} diff --git a/src/Exception.hpp b/src/Exception.hpp new file mode 100644 index 0000000..53b3d3a --- /dev/null +++ b/src/Exception.hpp @@ -0,0 +1,91 @@ +/* + * Open Chinese Convert + * + * Copyright 2010-2014 BYVoid + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +#include "Export.hpp" + +#ifdef _MSC_VER + +// Until Visual Studio 2013 (12.0), C++ 11 "noexcept" qualifier is not supported +# define noexcept +#endif // ifdef _MSC_VER + +namespace opencc { +class OPENCC_EXPORT Exception : public std::exception { +public: + Exception() { + } + + virtual ~Exception() throw() { + } + + Exception(const std::string& _message) : message(_message) { + } + + virtual const char* what() const noexcept { + return message.c_str(); + } + +protected: + std::string message; +}; + +class OPENCC_EXPORT FileNotFound : public Exception { +public: + FileNotFound(const std::string& fileName) : + Exception(fileName + " not found or not accessible.") { + } +}; + +class OPENCC_EXPORT FileNotWritable : public Exception { +public: + FileNotWritable(const std::string& fileName) : + Exception(fileName + " not writable.") { + } +}; + +class OPENCC_EXPORT InvalidFormat : public Exception { +public: + InvalidFormat(const std::string& message) : + Exception("Invalid format: " + message) { + } +}; + +class OPENCC_EXPORT InvalidTextDictionary : public InvalidFormat { +public: + InvalidTextDictionary(const std::string& _message, size_t lineNum) : + InvalidFormat("") { + std::ostringstream buffer; + buffer << "Invalid text dictionary at line " << lineNum << ": " + << _message; + message = buffer.str(); + } +}; + +class OPENCC_EXPORT InvalidUTF8 : public Exception { +public: + InvalidUTF8(const std::string& _message) : + Exception("Invalid UTF8: " + _message) { + } +}; +} diff --git a/src/Export.hpp b/src/Export.hpp new file mode 100644 index 0000000..35eee55 --- /dev/null +++ b/src/Export.hpp @@ -0,0 +1,40 @@ +/* + * Open Chinese Convert + * + * Copyright 2010-2014 BYVoid + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#if defined(Opencc_BUILT_AS_STATIC) || !defined(_WIN32) +# define OPENCC_EXPORT +# define OPENCC_NO_EXPORT +#else // if defined(Opencc_BUILT_AS_STATIC) || !defined(_WIN32) +# ifndef OPENCC_EXPORT +# ifdef libopencc_EXPORTS + +/* We are building this library */ +# define OPENCC_EXPORT __declspec(dllexport) +# else // ifdef libopencc_EXPORTS + +/* We are using this library */ +# define OPENCC_EXPORT __declspec(dllimport) +# endif // ifdef libopencc_EXPORTS +# endif // ifndef OPENCC_EXPORT + +# ifndef OPENCC_NO_EXPORT +# define OPENCC_NO_EXPORT +# endif // ifndef OPENCC_NO_EXPORT +#endif // if defined(Opencc_BUILT_AS_STATIC) || !defined(_WIN32) diff --git a/src/Lexicon.hpp b/src/Lexicon.hpp new file mode 100644 index 0000000..dd13cc2 --- /dev/null +++ b/src/Lexicon.hpp @@ -0,0 +1,67 @@ +/* + * Open Chinese Convert + * + * Copyright 2010-2014 BYVoid + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "Common.hpp" +#include "DictEntry.hpp" + +namespace opencc { +/** +* Storage of all entries +* @ingroup opencc_cpp_api +*/ +class OPENCC_EXPORT Lexicon { +public: + Lexicon() { + } + + ~Lexicon() { + for (DictEntry* entry : entries) { + delete entry; + } + } + + void Add(DictEntry* entry) { + entries.push_back(entry); + } + + void Sort() { + std::sort(entries.begin(), entries.end(), DictEntry::PtrLessThan); + } + + const DictEntry* At(size_t index) const { + return entries.at(index); + } + + size_t Length() const { + return entries.size(); + } + + vector::const_iterator begin() const { + return entries.begin(); + } + + vector::const_iterator end() const { + return entries.end(); + } + +private: + vector entries; +}; +} diff --git a/src/MaxMatchSegmentation.cpp b/src/MaxMatchSegmentation.cpp new file mode 100644 index 0000000..bd8734d --- /dev/null +++ b/src/MaxMatchSegmentation.cpp @@ -0,0 +1,49 @@ +/* + * Open Chinese Convert + * + * Copyright 2010-2014 BYVoid + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "MaxMatchSegmentation.hpp" + +using namespace opencc; + +SegmentsPtr MaxMatchSegmentation::Segment(const string& text) const { + SegmentsPtr segments(new Segments); + const char* segStart = text.c_str(); + size_t segLength = 0; + auto clearBuffer = [&segments, &segStart, &segLength]() { + if (segLength > 0) { + segments->AddSegment(UTF8Util::FromSubstr(segStart, segLength)); + segLength = 0; + } + }; + for (const char* pstr = text.c_str(); *pstr != '\0';) { + const Optional& matched = dict->MatchPrefix(pstr); + size_t matchedLength; + if (matched.IsNull()) { + matchedLength = UTF8Util::NextCharLength(pstr); + segLength += matchedLength; + } else { + clearBuffer(); + matchedLength = matched.Get()->KeyLength(); + segments->AddSegment(matched.Get()->Key()); + segStart = pstr + matchedLength; + } + pstr += matchedLength; + } + clearBuffer(); + return segments; +} diff --git a/src/MaxMatchSegmentation.hpp b/src/MaxMatchSegmentation.hpp new file mode 100644 index 0000000..75a049e --- /dev/null +++ b/src/MaxMatchSegmentation.hpp @@ -0,0 +1,47 @@ +/* + * Open Chinese Convert + * + * Copyright 2010-2014 BYVoid + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "Common.hpp" +#include "DictGroup.hpp" +#include "Segmentation.hpp" + +namespace opencc { +/** +* Implementation of maximal match segmentation +* @ingroup opencc_cpp_api +*/ +class OPENCC_EXPORT MaxMatchSegmentation : public Segmentation { +public: + MaxMatchSegmentation(const DictPtr _dict) : dict(_dict) { + } + + virtual ~MaxMatchSegmentation() { + } + + virtual SegmentsPtr Segment(const string& text) const; + + const DictPtr GetDict() const { + return dict; + } + +private: + const DictPtr dict; +}; +} diff --git a/src/Optional.hpp b/src/Optional.hpp new file mode 100644 index 0000000..33ac287 --- /dev/null +++ b/src/Optional.hpp @@ -0,0 +1,95 @@ +/* + * Open Chinese Convert + * + * Copyright 2010-2014 BYVoid + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +namespace opencc { +/** +* A class that wraps type T into a nullable type. +* @ingroup opencc_cpp_api +*/ +template +class Optional { +public: + /** + * The constructor of Optional. + */ + Optional(T actual) : isNull(false), data(actual) { + } + + /** + * Returns true if the instance is null. + */ + bool IsNull() const { + return isNull; + } + + /** + * Returns the containing data of the instance. + */ + const T& Get() const { + return data; + } + + /** + * Constructs a null instance. + */ + static Optional Null() { + return Optional(); + } + +private: + Optional() : isNull(true) { + } + + bool isNull; + T data; +}; + +/** +* Specialization of Optional for pointers. +* +* Reduce a bool. +*/ +template +class Optional { +private: + Optional() : data(nullptr) { + } + + typedef T* TPtr; + TPtr data; + +public: + Optional(TPtr actual) : data(actual) { + } + + bool IsNull() const { + return data == nullptr; + } + + const TPtr& Get() const { + return data; + } + + static Optional Null() { + return Optional(); + } +}; + +} \ No newline at end of file diff --git a/src/Segmentation.cpp b/src/Segmentation.cpp new file mode 100644 index 0000000..fb31c8a --- /dev/null +++ b/src/Segmentation.cpp @@ -0,0 +1,17 @@ +/* + * Open Chinese Convert + * + * Copyright 2010-2014 BYVoid + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ diff --git a/src/dict_chain.h b/src/Segmentation.hpp similarity index 61% rename from src/dict_chain.h rename to src/Segmentation.hpp index 0f16c78..84e7f21 100644 --- a/src/dict_chain.h +++ b/src/Segmentation.hpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2010-2013 BYVoid + * Copyright 2010-2014 BYVoid * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,17 +16,17 @@ * limitations under the License. */ -#ifndef __DICTIONARY_SET_H_ -#define __DICTIONARY_SET_H_ +#pragma once -#include "common.h" +#include "Common.hpp" -DictChain* dict_chain_new(Config* config); - -void dict_chain_delete(DictChain* dict_chain); - -DictGroup* dict_chain_add_group(DictChain* dict_chain); - -DictGroup* dict_chain_get_group(DictChain* dict_chain, size_t index); - -#endif /* __DICTIONARY_SET_H_ */ +namespace opencc { +/** +* Abstract segmentation +* @ingroup opencc_cpp_api +*/ +class OPENCC_EXPORT Segmentation { +public: + virtual SegmentsPtr Segment(const string& text) const = 0; +}; +} diff --git a/src/Segments.hpp b/src/Segments.hpp new file mode 100644 index 0000000..bf97d36 --- /dev/null +++ b/src/Segments.hpp @@ -0,0 +1,124 @@ +/* + * Open Chinese Convert + * + * Copyright 2010-2014 BYVoid + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "Common.hpp" + +namespace opencc { +/** +* Segmented text +* @ingroup opencc_cpp_api +*/ +class OPENCC_EXPORT Segments { +public: + Segments() { + } + + Segments(std::initializer_list initList) { + for (const string& item : initList) { + AddSegment(item); + } + } + + Segments(std::initializer_list initList) { + for (const string& item : initList) { + AddSegment(item); + } + } + + void AddSegment(const char* unmanagedString) { + indexes.push_back(std::make_pair(unmanaged.size(), false)); + unmanaged.push_back(unmanagedString); + } + + void AddSegment(const string& str) { + indexes.push_back(std::make_pair(managed.size(), true)); + managed.push_back(str); + } + + class iterator : public std::iterator { + public: + iterator(const Segments* const _segments, + size_t _cursor) + : segments(_segments), cursor(_cursor) { + } + + iterator& operator++() { + cursor++; + return *this; + } + + bool operator==(const iterator& that) const { + return cursor == that.cursor && segments == that.segments; + } + + bool operator!=(const iterator& that) const { + return !this->operator==(that); + } + + const char* operator*() const { + return segments->At(cursor); + } + + private: + const Segments* const segments; + size_t cursor; + }; + + const char* At(size_t cursor) const { + const auto& index = indexes[cursor]; + if (index.second) { + return managed[index.first].c_str(); + } else { + return unmanaged[index.first]; + } + } + + size_t Length() const { + return indexes.size(); + } + + iterator begin() const { + return iterator(this, 0); + } + + iterator end() const { + return iterator(this, indexes.size()); + } + + string ToString() const { + // TODO implement a nested structure to reduce concatenation, + // like a purely functional differential list + std::ostringstream buffer; + for (const char* segment : *this) { + buffer << segment; + } + return buffer.str(); + } + +private: + Segments(const Segments&) { + } + + vector unmanaged; + vector managed; + // index, managed + vector> indexes; +}; +} diff --git a/src/SerializableDict.hpp b/src/SerializableDict.hpp new file mode 100644 index 0000000..7b14cb4 --- /dev/null +++ b/src/SerializableDict.hpp @@ -0,0 +1,69 @@ +/* + * Open Chinese Convert + * + * Copyright 2010-2014 BYVoid + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "Dict.hpp" + +namespace opencc { +/** +* Serializable dictionary interface +* @ingroup opencc_cpp_api +*/ +class OPENCC_EXPORT SerializableDict { +public: + /** + * Serializes the dictionary and writes in to a file. + */ + virtual void SerializeToFile(FILE* fp) const = 0; + + /** + * Serializes the dictionary and writes in to a file. + */ + virtual void SerializeToFile(const string& fileName) const { + FILE* fp = fopen(fileName.c_str(), "wb"); + if (fp == NULL) { + throw FileNotWritable(fileName); + } + SerializeToFile(fp); + fclose(fp); + } + + template + static bool TryLoadFromFile(const string& fileName, + std::shared_ptr* dict) { + FILE* fp = fopen(fileName.c_str(), "rb"); + if (fp == NULL) { + return false; + } + std::shared_ptr loadedDict = DICT::NewFromFile(fp); + fclose(fp); + *dict = loadedDict; + return true; + } + + template + static std::shared_ptr NewFromFile(const string& fileName) { + std::shared_ptr dict; + if (!TryLoadFromFile(fileName, &dict)) { + throw FileNotFound(fileName); + } + return dict; + } +}; +} diff --git a/src/SimpleConverter.cpp b/src/SimpleConverter.cpp new file mode 100644 index 0000000..ccb72b2 --- /dev/null +++ b/src/SimpleConverter.cpp @@ -0,0 +1,146 @@ +/* + * Open Chinese Convert + * + * Copyright 2010-2014 BYVoid + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Config.hpp" +#include "Converter.hpp" +#include "opencc.h" +#include "UTF8Util.hpp" + +using namespace opencc; + +struct InternalData { + const ConverterPtr converter; + + InternalData(const ConverterPtr& _converter) : converter(_converter) { + } +}; + +SimpleConverter::SimpleConverter(const std::string& configFileName) { + try { + Config config; + internalData = new InternalData(config.NewFromFile(configFileName)); + } catch (Exception& ex) { + throw std::runtime_error(ex.what()); + } +} + +SimpleConverter::~SimpleConverter() { + delete (InternalData*)internalData; +} + +std::string SimpleConverter::Convert(const std::string& input) const { + try { + const InternalData* data = (InternalData*)internalData; + return data->converter->Convert(input); + } catch (Exception& ex) { + throw std::runtime_error(ex.what()); + } +} + +std::string SimpleConverter::Convert(const char* input) const { + return Convert(string(input)); +} + +std::string SimpleConverter::Convert(const char* input, size_t length) const { + if (length == static_cast(-1)) { + return Convert(string(input)); + } else { + return Convert(UTF8Util::FromSubstr(input, length)); + } +} + +size_t SimpleConverter::Convert(const char* input, char* output) const { + try { + const InternalData* data = (InternalData*)internalData; + return data->converter->Convert(input, output); + } catch (Exception& ex) { + throw std::runtime_error(ex.what()); + } +} + +size_t SimpleConverter::Convert(const char* input, + size_t length, + char* output) const { + if (length == static_cast(-1)) { + return Convert(input, output); + } else { + string trimmed = UTF8Util::FromSubstr(input, length); + return Convert(trimmed.c_str(), output); + } +} + +static string cError; + +opencc_t opencc_open(const char* configFileName) { + try { + if (configFileName == nullptr) { + configFileName = OPENCC_DEFAULT_CONFIG_SIMP_TO_TRAD; + } + SimpleConverter* instance = new SimpleConverter(configFileName); + return instance; + } catch (std::runtime_error& ex) { + cError = ex.what(); + return reinterpret_cast(-1); + } +} + +int opencc_close(opencc_t opencc) { + try { + SimpleConverter* instance = reinterpret_cast(opencc); + delete instance; + return 0; + } catch (std::exception& ex) { + cError = ex.what(); + return 1; + } +} + +size_t opencc_convert_utf8_to_buffer(opencc_t opencc, + const char* input, + size_t length, + char* output) { + try { + SimpleConverter* instance = reinterpret_cast(opencc); + return instance->Convert(input, length, output); + } catch (std::runtime_error& ex) { + cError = ex.what(); + return static_cast(-1); + } +} + +char* opencc_convert_utf8(opencc_t opencc, const char* input, size_t length) { + try { + SimpleConverter* instance = reinterpret_cast(opencc); + std::string converted = instance->Convert(input, length); + char* output = new char[converted.length() + 1]; + strncpy(output, converted.c_str(), converted.length()); + output[converted.length()] = '\0'; + return output; + } catch (std::runtime_error& ex) { + cError = ex.what(); + return nullptr; + } +} + +void opencc_convert_utf8_free(char* str) { + delete[] str; +} + +const char* opencc_error(void) { + return cError.c_str(); +} diff --git a/src/TextDict.cpp b/src/TextDict.cpp new file mode 100644 index 0000000..c6d53f0 --- /dev/null +++ b/src/TextDict.cpp @@ -0,0 +1,116 @@ +/* + * Open Chinese Convert + * + * Copyright 2010-2014 BYVoid + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Lexicon.hpp" +#include "TextDict.hpp" + +using namespace opencc; + +static size_t GetKeyMaxLength(const LexiconPtr& lexicon) { + size_t maxLength = 0; + for (const auto& entry : *lexicon) { + size_t keyLength = entry->KeyLength(); + maxLength = std::max(keyLength, maxLength); + } + return maxLength; +} + +static DictEntry* ParseKeyValues(const char* buff, size_t lineNum) { + size_t length; + const char* pbuff = UTF8Util::FindNextInline(buff, '\t'); + if (UTF8Util::IsLineEndingOrFileEnding(*pbuff)) { + throw InvalidTextDictionary("Tabular not found " + string(buff), lineNum); + } + length = static_cast(pbuff - buff); + string key = UTF8Util::FromSubstr(buff, length); + vector values; + while (!UTF8Util::IsLineEndingOrFileEnding(*pbuff)) { + buff = pbuff = UTF8Util::NextChar(pbuff); + pbuff = UTF8Util::FindNextInline(buff, ' '); + length = static_cast(pbuff - buff); + const string& value = UTF8Util::FromSubstr(buff, length); + values.push_back(value); + } + if (values.size() == 0) { + throw InvalidTextDictionary("No value in an item", lineNum); + } else if (values.size() == 1) { + return DictEntryFactory::New(key, values.at(0)); + } else { + return DictEntryFactory::New(key, values); + } +} + +static LexiconPtr ParseLexiconFromFile(FILE* fp) { + const int ENTRY_BUFF_SIZE = 4096; + char buff[ENTRY_BUFF_SIZE]; + LexiconPtr lexicon(new Lexicon); + UTF8Util::SkipUtf8Bom(fp); + size_t lineNum = 1; + while (fgets(buff, ENTRY_BUFF_SIZE, fp)) { + lexicon->Add(ParseKeyValues(buff, lineNum)); + lineNum++; + } + return lexicon; +} + +TextDict::TextDict(const LexiconPtr& _lexicon) + : maxLength(GetKeyMaxLength(_lexicon)), lexicon(_lexicon) { +} + +TextDict::~TextDict() { +} + +TextDictPtr TextDict::NewFromSortedFile(FILE* fp) { + const LexiconPtr& lexicon = ParseLexiconFromFile(fp); + return TextDictPtr(new TextDict(lexicon)); +} + +TextDictPtr TextDict::NewFromFile(FILE* fp) { + const LexiconPtr& lexicon = ParseLexiconFromFile(fp); + lexicon->Sort(); + return TextDictPtr(new TextDict(lexicon)); +} + +TextDictPtr TextDict::NewFromDict(const Dict& dict) { + return TextDictPtr(new TextDict(dict.GetLexicon())); +} + +size_t TextDict::KeyMaxLength() const { + return maxLength; +} + +Optional TextDict::Match(const char* word) const { + NoValueDictEntry entry(word); + const auto& found = std::lower_bound(lexicon->begin(), lexicon->end(), + &entry, DictEntry::PtrLessThan); + if ((found != lexicon->end()) && (strcmp((*found)->Key(), entry.Key()) == 0)) { + return Optional(*found); + } else { + return Optional::Null(); + } +} + +LexiconPtr TextDict::GetLexicon() const { + return lexicon; +} + +void TextDict::SerializeToFile(FILE* fp) const { + for (const auto& entry : *lexicon) { + fprintf(fp, "%s\n", entry->ToString().c_str()); + } +} diff --git a/src/TextDict.hpp b/src/TextDict.hpp new file mode 100644 index 0000000..5000b27 --- /dev/null +++ b/src/TextDict.hpp @@ -0,0 +1,60 @@ +/* + * Open Chinese Convert + * + * Copyright 2010-2014 BYVoid + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "Common.hpp" +#include "SerializableDict.hpp" + +namespace opencc { +/** +* Text dictionary +* @ingroup opencc_cpp_api +*/ +class OPENCC_EXPORT TextDict : public Dict, public SerializableDict { +public: + /** + * Constructor of TextDict. + * _lexicon must be sorted. + */ + TextDict(const LexiconPtr& _lexicon); + + virtual ~TextDict(); + + virtual size_t KeyMaxLength() const; + + virtual Optional Match(const char* word) const; + + virtual LexiconPtr GetLexicon() const; + + virtual void SerializeToFile(FILE* fp) const; + + /** + * Constructs a TextDict from another dictionary. + */ + static TextDictPtr NewFromDict(const Dict& dict); + + static TextDictPtr NewFromFile(FILE* fp); + + static TextDictPtr NewFromSortedFile(FILE* fp); + +private: + const size_t maxLength; + const LexiconPtr lexicon; +}; +} diff --git a/src/UTF8Util.cpp b/src/UTF8Util.cpp new file mode 100644 index 0000000..42d0cf2 --- /dev/null +++ b/src/UTF8Util.cpp @@ -0,0 +1,46 @@ +/* + * Open Chinese Convert + * + * Copyright 2010-2014 BYVoid + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "UTF8Util.hpp" + +using namespace opencc; + +void UTF8Util::SkipUtf8Bom(FILE* fp) { + /* UTF-8 BOM is EF BB BF */ + if (fp == NULL) { + return; + } + /* If we are not at beginning of file, return */ + if (ftell(fp) != 0) { + return; + } + + /* Try to read first 3 bytes */ + int bom[3]; + int n; + for (n = 0; n <= 2 && (bom[n] = getc(fp)) != EOF; n++) {} + /* If we can only read <3 bytes, push them back */ + /* Or if first 3 bytes is not BOM, push them back */ + if ((n < 3) || (bom[0] != 0xEF) || (bom[1] != 0xBB) || (bom[2] != 0xBF)) { + for (n--; n >= 0; n--) { + ungetc(bom[n], fp); + } + } + + /* Otherwise, BOM is already skipped */ +} diff --git a/src/UTF8Util.hpp b/src/UTF8Util.hpp new file mode 100644 index 0000000..bf21afb --- /dev/null +++ b/src/UTF8Util.hpp @@ -0,0 +1,201 @@ +/* + * Open Chinese Convert + * + * Copyright 2013 BYVoid + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "Common.hpp" + +namespace opencc { +/** +* UTF8 string utilities +* @ingroup opencc_cpp_api +*/ +class OPENCC_EXPORT UTF8Util { +public: + /** + * Detect UTF8 BOM and skip it. + */ + static void SkipUtf8Bom(FILE* fp); + + /** + * Returns the length in byte for the next UTF8 character. + * On error returns 0. + */ + static size_t NextCharLengthNoException(const char* str) { + char ch = *str; + if ((ch & 0x80) == 0x00) { + return 1; + } else if ((ch & 0xE0) == 0xC0) { + return 2; + } else if ((ch & 0xF0) == 0xE0) { + return 3; + } else if ((ch & 0xF8) == 0xF0) { + return 4; + } else if ((ch & 0xFC) == 0xF8) { + return 5; + } else if ((ch & 0xFE) == 0xFC) { + return 6; + } + return 0; + } + + /** + * Returns the length in byte for the next UTF8 character. + */ + static size_t NextCharLength(const char* str) { + size_t length = NextCharLengthNoException(str); + if (length == 0) { + throw InvalidUTF8(str); + } + return length; + } + + /** + * Returns the length in byte for the previous UTF8 character. + */ + static size_t PrevCharLength(const char* str) { + for (size_t i = 1; i <= 6; i++) { + str--; + size_t length = NextCharLengthNoException(str); + if (length == i) { + return length; + } + } + throw InvalidUTF8(str); + } + + /** + * Returns the char* pointer over the next UTF8 character. + */ + static const char* NextChar(const char* str) { + return str + NextCharLength(str); + } + + /** + * Move the char* pointer before the previous UTF8 character. + */ + static const char* PrevChar(const char* str) { + return str - PrevCharLength(str); + } + + /** + * Finds a character in the same line. + * @param str The text to be searched in. + * @param ch The character to find. + * @return The pointer that points to the found chacter in str or EOL/EOF. + */ + static const char* FindNextInline(const char* str, const char ch) { + while (!IsLineEndingOrFileEnding(*str) && *str != ch) { + str = NextChar(str); + } + return str; + } + + /** + * Returns ture if the character is a line ending or end of file. + */ + static bool IsLineEndingOrFileEnding(const char ch) { + return ch == '\0' || ch == '\n' || ch == '\r'; + } + + /** + * Copies a substring with given length to a new std::string. + */ + static string FromSubstr(const char* str, size_t length) { + string newStr; + newStr.resize(length); + strncpy(const_cast(newStr.c_str()), str, length); + return newStr; + } + + /** + * Returns true if the given string is longer or as long as the given length. + */ + static bool NotShorterThan(const char* str, size_t length) { + while (length > 0) { + if (*str == '\0') { + return false; + } + length--; + str++; + } + return true; + } + + /** + * Truncates a string with a maximal length. + * No UTF8 character will be broken. + */ + static string TruncateUTF8(const char* str, size_t maxLength) { + string wordTrunc; + if (NotShorterThan(str, maxLength)) { + size_t len = 0; + const char* pStr = str; + while (len < maxLength) { + size_t nextLen = NextCharLength(pStr); + pStr += nextLen; + len += nextLen; + } + wordTrunc = FromSubstr(str, len); + } else { + wordTrunc = str; + } + return wordTrunc; + } + + /** + * Replaces all patterns in a string in place. + */ + static void ReplaceAll(string& str, const char* from, const char* to) { + string::size_type pos = 0; + string::size_type fromLen = strlen(from); + string::size_type toLen = strlen(to); + while ((pos = str.find(from, pos)) != string::npos) { + str.replace(pos, fromLen, to); + pos += toLen; + } + } + + /** + * Joins a string vector in to a string with a separator. + */ + static string Join(const vector& strings, const string& separator) { + std::ostringstream buffer; + bool first = true; + for (const auto& str : strings) { + if (!first) { + buffer << separator; + } + buffer << str; + first = false; + } + return buffer.str(); + } + + /** + * Joins a string vector in to a string. + */ + static string Join(const vector& strings) { + std::ostringstream buffer; + for (const auto& str : strings) { + buffer << str; + } + return buffer.str(); + } +}; +} diff --git a/src/common.h b/src/common.h deleted file mode 100644 index 9193d0e..0000000 --- a/src/common.h +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Open Chinese Convert - * - * Copyright 2010-2013 BYVoid - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __COMMON_H_ -#define __COMMON_H_ - -#include -#include -#include -#include -#include - -#include "opencc_types.h" - -#define INFINITY_INT ((~0U) >> 1) - -#ifdef ENABLE_GETTEXT -# include -# include -# define _(STRING) dgettext(PACKAGE_NAME, STRING) -#else // ENABLE_GETTEXT -# define _(STRING) STRING -#endif // ENABLE_GETTEXT - -#ifndef PKGDATADIR -#define PKGDATADIR "" -#endif - -struct SConfig; -struct SConverter; -struct SDict; -struct SDictGroup; -struct SDictChain; -struct SDictMeta; - -typedef struct SConfig Config; -typedef struct SConverter Converter; -typedef struct SDict Dict; -typedef struct SDictGroup DictGroup; -typedef struct SDictChain DictChain; -typedef struct SDictMeta DictMeta; - -struct SDict { - opencc_dictionary_type type; - Dict* dict; -}; - -#define DICTIONARY_MAX_COUNT 128 -struct SDictGroup { - DictChain* dict_chain; - size_t count; - Dict* dicts[DICTIONARY_MAX_COUNT]; -}; - -#define DICTIONARY_GROUP_MAX_COUNT 128 -struct SDictChain { - Config* config; - size_t count; - DictGroup* groups[DICTIONARY_GROUP_MAX_COUNT]; -}; - -struct SDictMeta { - opencc_dictionary_type dict_type; - char* file_name; - size_t index; - size_t stamp; -}; - -struct SConfig { - char* title; - char* description; - DictChain* dict_chain; - char* file_path; - DictMeta dicts[DICTIONARY_MAX_COUNT]; - size_t dicts_count; - size_t stamp; -}; - -struct SConverter { - opencc_conversion_mode conversion_mode; - DictChain* dict_chain; - DictGroup* current_dict_group; - void* data; -}; - -#endif // __COMMON_H_ diff --git a/src/config_reader.c b/src/config_reader.c deleted file mode 100644 index 7678892..0000000 --- a/src/config_reader.c +++ /dev/null @@ -1,243 +0,0 @@ -/* - * Open Chinese Convert - * - * Copyright 2010-2013 BYVoid - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "config_reader.h" -#include "dict_group.h" -#include "dict_chain.h" - -#define LINE_BUFFER_SIZE 8192 -#define CONFIG_DICT_TYPE_OCD "OCD" -#define CONFIG_DICT_TYPE_TEXT "TEXT" - -static config_error errnum = CONFIG_ERROR_VOID; - -static int qsort_dictionary_buffer_cmp(const void* a, const void* b) { - if (((DictMeta*)a)->index < ((DictMeta*)b)->index) { - return -1; - } - if (((DictMeta*)a)->index > ((DictMeta*)b)->index) { - return 1; - } - return ((DictMeta*)a)->stamp < ((DictMeta*)b)->stamp ? -1 : 1; -} - -static int load_dictionary(Config* config) { - if (config->dicts_count == 0) { - return 0; - } - // Sort dictionaries - qsort(config->dicts, - config->dicts_count, - sizeof(config->dicts[0]), - qsort_dictionary_buffer_cmp); - DictGroup* group = dict_chain_add_group(config->dict_chain); - size_t last_index = 0; - size_t i; - for (i = 0; i < config->dicts_count; i++) { - if (config->dicts[i].index > last_index) { - last_index = config->dicts[i].index; - group = dict_chain_add_group(config->dict_chain); - } - dict_group_load(group, - config->dicts[i].file_name, - config->dicts[i].dict_type); - } - return 0; -} - -static int parse_add_dict(Config* config, size_t index, const char* dictstr) { - const char* pstr = dictstr; - while (*pstr != '\0' && *pstr != ' ') { - pstr++; - } - opencc_dictionary_type dict_type; - if (strncmp(dictstr, CONFIG_DICT_TYPE_OCD, - sizeof(CONFIG_DICT_TYPE_OCD) - 1) == 0) { - dict_type = OPENCC_DICTIONARY_TYPE_DATRIE; - } else if (strncmp(dictstr, CONFIG_DICT_TYPE_TEXT, - sizeof(CONFIG_DICT_TYPE_OCD) - 1) == 0) { - dict_type = OPENCC_DICTIONARY_TYPE_TEXT; - } else { - errnum = CONFIG_ERROR_INVALID_DICT_TYPE; - return -1; - } - while (*pstr != '\0' && (*pstr == ' ' || *pstr == '\t')) { - pstr++; - } - size_t i = config->dicts_count++; - config->dicts[i].dict_type = dict_type; - config->dicts[i].file_name = mstrcpy(pstr); - config->dicts[i].index = index; - config->dicts[i].stamp = config->stamp++; - return 0; -} - -static int parse_property(Config* config, const char* key, const char* value) { - if (strncmp(key, "dict", 4) == 0) { - int index = 0; - sscanf(key + 4, "%d", &index); - return parse_add_dict(config, index, value); - } else if (strcmp(key, "title") == 0) { - free(config->title); - config->title = mstrcpy(value); - return 0; - } else if (strcmp(key, "description") == 0) { - free(config->description); - config->description = mstrcpy(value); - return 0; - } - errnum = CONFIG_ERROR_NO_PROPERTY; - return -1; -} - -static int parse_line(const char* line, char** key, char** value) { - const char* line_begin = line; - while (*line != '\0' && (*line != ' ' && *line != '\t' && *line != '=')) { - line++; - } - size_t key_len = line - line_begin; - while (*line != '\0' && *line != '=') { - line++; - } - if (*line == '\0') { - return -1; - } - assert(*line == '='); - *key = mstrncpy(line_begin, key_len); - line++; - while (*line != '\0' && (*line == ' ' || *line == '\t')) { - line++; - } - if (*line == '\0') { - free(*key); - return -1; - } - *value = mstrcpy(line); - return 0; -} - -static char* parse_trim(char* str) { - for (; *str != '\0' && (*str == ' ' || *str == '\t'); str++) {} - register char* prs = str; - for (; *prs != '\0' && *prs != '\n' && *prs != '\r'; prs++) {} - for (prs--; prs > str && (*prs == ' ' || *prs == '\t'); prs--) {} - *(++prs) = '\0'; - return str; -} - -static int parse(Config* config, const char* filename) { - char* path = try_open_file(filename); - if (path == NULL) { - errnum = CONFIG_ERROR_CANNOT_ACCESS_CONFIG_FILE; - return -1; - } - config->file_path = get_file_path(path); - FILE* fp = fopen(path, "r"); - assert(fp != NULL); - free(path); - skip_utf8_bom(fp); - static char buff[LINE_BUFFER_SIZE]; - while (fgets(buff, LINE_BUFFER_SIZE, fp) != NULL) { - char* trimed_buff = parse_trim(buff); - if ((*trimed_buff == ';') || (*trimed_buff == '#') || - (*trimed_buff == '\0')) { - /* Comment Line or empty line */ - continue; - } - char* key = NULL, * value = NULL; - if (parse_line(trimed_buff, &key, &value) == -1) { - free(key); - free(value); - fclose(fp); - errnum = CONFIG_ERROR_PARSE; - return -1; - } - if (parse_property(config, key, value) == -1) { - free(key); - free(value); - fclose(fp); - return -1; - } - free(key); - free(value); - } - fclose(fp); - return 0; -} - -DictChain* config_get_dict_chain(Config* config) { - if (config->dict_chain != NULL) { - dict_chain_delete(config->dict_chain); - } - config->dict_chain = dict_chain_new(config); - load_dictionary(config); - return config->dict_chain; -} - -config_error config_errno(void) { - return errnum; -} - -void config_perror(const char* spec) { - perr(spec); - perr("\n"); - switch (errnum) { - case CONFIG_ERROR_VOID: - break; - case CONFIG_ERROR_CANNOT_ACCESS_CONFIG_FILE: - perror(_("Can not access configuration file")); - break; - case CONFIG_ERROR_PARSE: - perr(_("Configuration file parse error")); - break; - case CONFIG_ERROR_NO_PROPERTY: - perr(_("Invalid property")); - break; - case CONFIG_ERROR_INVALID_DICT_TYPE: - perr(_("Invalid dictionary type")); - break; - default: - perr(_("Unknown")); - } -} - -Config* config_open(const char* filename) { - Config* config = (Config*)malloc(sizeof(Config)); - config->title = NULL; - config->description = NULL; - config->dicts_count = 0; - config->stamp = 0; - config->dict_chain = NULL; - config->file_path = NULL; - if (parse(config, filename) == -1) { - config_close((Config*)config); - return (Config*)-1; - } - return (Config*)config; -} - -void config_close(Config* config) { - size_t i; - for (i = 0; i < config->dicts_count; i++) { - free(config->dicts[i].file_name); - } - free(config->title); - free(config->description); - free(config->file_path); - free(config); -} diff --git a/src/converter.c b/src/converter.c deleted file mode 100644 index 51e0593..0000000 --- a/src/converter.c +++ /dev/null @@ -1,607 +0,0 @@ -/* - * Open Chinese Convert - * - * Copyright 2010-2013 BYVoid - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "common.h" -#include "converter.h" -#include "dict_group.h" -#include "dict_chain.h" -#include "encoding.h" - -#define DELIMITER ' ' -#define SEGMENT_MAXIMUM_LENGTH 0 -#define SEGMENT_SHORTEST_PATH 1 -#define SEGMENT_METHOD SEGMENT_SHORTEST_PATH - -#if SEGMENT_METHOD == SEGMENT_SHORTEST_PATH -# define OPENCC_SP_SEG_DEFAULT_BUFFER_SIZE 1024 -typedef struct { - int initialized; - size_t buffer_size; - size_t* match_length; - size_t* min_len; - size_t* parent; - size_t* path; -} SpsegData; -#endif - -static converter_error errnum = CONVERTER_ERROR_VOID; - -#if SEGMENT_METHOD == SEGMENT_SHORTEST_PATH -static void sp_seg_buffer_free(SpsegData* ossb) { - free(ossb->match_length); - free(ossb->min_len); - free(ossb->parent); - free(ossb->path); -} - -static void sp_seg_set_buffer_size(SpsegData* ossb, size_t buffer_size) { - if (ossb->initialized == 1) { - sp_seg_buffer_free(ossb); - } - ossb->buffer_size = buffer_size; - ossb->match_length = (size_t*)malloc((buffer_size + 1) * sizeof(size_t)); - ossb->min_len = (size_t*)malloc(buffer_size * sizeof(size_t)); - ossb->parent = (size_t*)malloc(buffer_size * sizeof(size_t)); - ossb->path = (size_t*)malloc(buffer_size * sizeof(size_t)); - ossb->initialized = 1; -} - -static size_t sp_seg(Converter* converter, - ucs4_t** inbuf, - size_t* inbuf_left, - ucs4_t** outbuf, - size_t* outbuf_left, - size_t length) { - /* 最短路徑分詞 */ - /* 對長度爲1時特殊優化 */ - if (length == 1) { - const ucs4_t* const* match_rs = dict_group_match_longest( - converter->current_dict_group, - *inbuf, - 1, - NULL); - size_t match_len = 1; - if (converter->conversion_mode == OPENCC_CONVERSION_FAST) { - if (match_rs == NULL) { - **outbuf = **inbuf; - (*outbuf)++, (*outbuf_left)--; - (*inbuf)++, (*inbuf_left)--; - } else { - const ucs4_t* result = match_rs[0]; - /* 輸出緩衝區剩餘空間小於分詞長度 */ - if (ucs4len(result) > *outbuf_left) { - errnum = CONVERTER_ERROR_OUTBUF; - return (size_t)-1; - } - for (; *result; result++) { - **outbuf = *result; - (*outbuf)++, (*outbuf_left)--; - } - *inbuf += match_len; - *inbuf_left -= match_len; - } - } else if (converter->conversion_mode == - OPENCC_CONVERSION_LIST_CANDIDATES) { - if (match_rs == NULL) { - **outbuf = **inbuf; - (*outbuf)++, (*outbuf_left)--; - (*inbuf)++, (*inbuf_left)--; - } else { - size_t i; - for (i = 0; match_rs[i] != NULL; i++) { - const ucs4_t* result = match_rs[i]; - int show_delimiter = match_rs[i + 1] != NULL ? 1 : 0; - /* 輸出緩衝區剩餘空間小於分詞長度 */ - if (ucs4len(result) + show_delimiter > *outbuf_left) { - errnum = CONVERTER_ERROR_OUTBUF; - return (size_t)-1; - } - for (; *result; result++) { - **outbuf = *result; - (*outbuf)++, (*outbuf_left)--; - } - if (show_delimiter) { - **outbuf = DELIMITER; - (*outbuf)++, (*outbuf_left)--; - } - } - *inbuf += match_len; - *inbuf_left -= match_len; - } - } else if (converter->conversion_mode == OPENCC_CONVERSION_SEGMENT_ONLY) { - if (match_rs == NULL) { - **outbuf = **inbuf; - (*outbuf)++, (*outbuf_left)--; - (*inbuf)++, (*inbuf_left)--; - } else { - /* 輸出緩衝區剩餘空間小於分詞長度 */ - if (match_len + 1 > *outbuf_left) { - errnum = CONVERTER_ERROR_OUTBUF; - return (size_t)-1; - } - size_t i; - for (i = 0; i < match_len; i++) { - **outbuf = **inbuf; - (*outbuf)++, (*outbuf_left)--; - (*inbuf)++, (*inbuf_left)--; - } - } - **outbuf = DELIMITER; - (*outbuf)++, (*outbuf_left)--; - } else { - debug_should_not_be_here(); - } - /* 必須保證有一個字符空間 */ - return match_len; - } - - /* 設置緩衝區空間 */ - SpsegData* ossb = converter->data; - size_t buffer_size_need = length + 1; - if ((ossb->initialized == 0) || (ossb->buffer_size < buffer_size_need)) { - sp_seg_set_buffer_size(ossb, buffer_size_need); - } - size_t i, j; - for (i = 0; i <= length; i++) { - ossb->min_len[i] = INFINITY_INT; - } - ossb->min_len[0] = ossb->parent[0] = 0; - for (i = 0; i < length; i++) { - /* 獲取所有匹配長度 */ - size_t match_count = dict_group_get_all_match_lengths( - converter->current_dict_group, - (*inbuf) + i, - ossb->match_length - ); - if (ossb->match_length[0] != 1) { - ossb->match_length[match_count++] = 1; - } - /* 動態規劃求最短分割路徑 */ - for (j = 0; j < match_count; j++) { - size_t k = ossb->match_length[j]; - ossb->match_length[j] = 0; - if ((k > 1) && (ossb->min_len[i] + 1 <= ossb->min_len[i + k])) { - ossb->min_len[i + k] = ossb->min_len[i] + 1; - ossb->parent[i + k] = i; - } else if ((k == 1) && - (ossb->min_len[i] + 1 < ossb->min_len[i + k])) { - ossb->min_len[i + k] = ossb->min_len[i] + 1; - ossb->parent[i + k] = i; - } - } - } - /* 取得最短分割路徑 */ - for (i = length, j = ossb->min_len[length]; i != 0; i = ossb->parent[i]) { - ossb->path[--j] = i; - } - size_t inbuf_left_start = *inbuf_left; - size_t begin, end; - /* 根據最短分割路徑轉換 */ - for (i = begin = 0; i < ossb->min_len[length]; i++) { - end = ossb->path[i]; - size_t match_len; - const ucs4_t* const* match_rs = dict_group_match_longest( - converter->current_dict_group, - *inbuf, - end - begin, - &match_len - ); - if (match_rs == NULL) { - **outbuf = **inbuf; - (*outbuf)++, (*outbuf_left)--; - (*inbuf)++, (*inbuf_left)--; - } else { - if (converter->conversion_mode == OPENCC_CONVERSION_FAST) { - if (match_rs == NULL) { - **outbuf = **inbuf; - (*outbuf)++, (*outbuf_left)--; - (*inbuf)++, (*inbuf_left)--; - } else { - const ucs4_t* result = match_rs[0]; - /* 輸出緩衝區剩餘空間小於分詞長度 */ - if (ucs4len(result) > *outbuf_left) { - if (inbuf_left_start - *inbuf_left > 0) { - break; - } - errnum = CONVERTER_ERROR_OUTBUF; - return (size_t)-1; - } - for (; *result; result++) { - **outbuf = *result; - (*outbuf)++, (*outbuf_left)--; - } - *inbuf += match_len; - *inbuf_left -= match_len; - } - } else if (converter->conversion_mode == - OPENCC_CONVERSION_LIST_CANDIDATES) { - if (match_rs == NULL) { - **outbuf = **inbuf; - (*outbuf)++, (*outbuf_left)--; - (*inbuf)++, (*inbuf_left)--; - } else { - size_t i; - for (i = 0; match_rs[i] != NULL; i++) { - const ucs4_t* result = match_rs[i]; - int show_delimiter = match_rs[i + 1] != NULL ? 1 : 0; - /* 輸出緩衝區剩餘空間小於分詞長度 */ - if (ucs4len(result) + show_delimiter > *outbuf_left) { - if (inbuf_left_start - *inbuf_left > 0) { - break; - } - errnum = CONVERTER_ERROR_OUTBUF; - return (size_t)-1; - } - for (; *result; result++) { - **outbuf = *result; - (*outbuf)++, (*outbuf_left)--; - } - if (show_delimiter) { - **outbuf = DELIMITER; - (*outbuf)++, (*outbuf_left)--; - } - } - *inbuf += match_len; - *inbuf_left -= match_len; - } - } else if (converter->conversion_mode == OPENCC_CONVERSION_SEGMENT_ONLY) { - if (match_rs == NULL) { - **outbuf = **inbuf; - (*outbuf)++, (*outbuf_left)--; - (*inbuf)++, (*inbuf_left)--; - } else { - /* 輸出緩衝區剩餘空間小於分詞長度 */ - if (match_len + 1 > *outbuf_left) { - if (inbuf_left_start - *inbuf_left > 0) { - break; - } - errnum = CONVERTER_ERROR_OUTBUF; - return (size_t)-1; - } - size_t i; - for (i = 0; i < match_len; i++) { - **outbuf = **inbuf; - (*outbuf)++, (*outbuf_left)--; - (*inbuf)++, (*inbuf_left)--; - } - } - **outbuf = DELIMITER; - (*outbuf)++, (*outbuf_left)--; - } else { - debug_should_not_be_here(); - } - } - begin = end; - } - return inbuf_left_start - *inbuf_left; -} - -static size_t segment(Converter* converter, - ucs4_t** inbuf, - size_t* inbuf_left, - ucs4_t** outbuf, - size_t* outbuf_left) { - /* 歧義分割最短路徑分詞 */ - size_t i, start, bound; - const ucs4_t* inbuf_start = *inbuf; - size_t inbuf_left_start = *inbuf_left; - size_t sp_seg_length; - bound = 0; - for (i = start = 0; inbuf_start[i] && *inbuf_left > 0 && *outbuf_left > 0; - i++) { - if ((i != 0) && (i == bound)) { - /* 對歧義部分進行最短路徑分詞 */ - sp_seg_length = sp_seg(converter, - inbuf, - inbuf_left, - outbuf, - outbuf_left, - bound - start); - - if (sp_seg_length == (size_t)-1) { - return (size_t)-1; - } - if (sp_seg_length == 0) { - if (inbuf_left_start - *inbuf_left > 0) { - return inbuf_left_start - *inbuf_left; - } - /* 空間不足 */ - errnum = CONVERTER_ERROR_OUTBUF; - return (size_t)-1; - } - start = i; - } - size_t match_len; - dict_group_match_longest( - converter->current_dict_group, - inbuf_start + i, - 0, - &match_len - ); - if (match_len == 0) { - match_len = 1; - } - if (i + match_len > bound) { - bound = i + match_len; - } - } - if ((*inbuf_left > 0) && (*outbuf_left > 0)) { - sp_seg_length = sp_seg(converter, - inbuf, - inbuf_left, - outbuf, - outbuf_left, - bound - start); - if (sp_seg_length == (size_t)-1) { - return (size_t)-1; - } - if (sp_seg_length == 0) { - if (inbuf_left_start - *inbuf_left > 0) { - return inbuf_left_start - *inbuf_left; - } - /* 空間不足 */ - errnum = CONVERTER_ERROR_OUTBUF; - return (size_t)-1; - } - } - if (converter->conversion_mode == OPENCC_CONVERSION_SEGMENT_ONLY) { - (*outbuf)--; - (*outbuf_left)++; - } - return inbuf_left_start - *inbuf_left; -} - -#endif /* if SEGMENT_METHOD == SEGMENT_SHORTEST_PATH */ - -#if SEGMENT_METHOD == SEGMENT_MAXIMUM_LENGTH -static size_t segment(Converter* converter, - ucs4_t** inbuf, - size_t* inbuf_left, - ucs4_t** outbuf, - size_t* outbuf_left) { - /* 正向最大分詞 */ - size_t inbuf_left_start = *inbuf_left; - for (; **inbuf && *inbuf_left > 0 && *outbuf_left > 0;) { - size_t match_len; - const ucs4_t* const* match_rs = dict_group_match_longest( - converter->current_dict_group, - *inbuf, - *inbuf_left, - &match_len - ); - if (converter->conversion_mode == OPENCC_CONVERSION_FAST) { - if (match_rs == NULL) { - **outbuf = **inbuf; - (*outbuf)++, (*outbuf_left)--; - (*inbuf)++, (*inbuf_left)--; - } else { - const ucs4_t* result = match_rs[0]; - /* 輸出緩衝區剩餘空間小於分詞長度 */ - if (ucs4len(result) > *outbuf_left) { - if (inbuf_left_start - *inbuf_left > 0) { - break; - } - errnum = CONVERTER_ERROR_OUTBUF; - return (size_t)-1; - } - for (; *result; result++) { - **outbuf = *result; - (*outbuf)++, (*outbuf_left)--; - } - *inbuf += match_len; - *inbuf_left -= match_len; - } - } else if (converter->conversion_mode == - OPENCC_CONVERSION_LIST_CANDIDATES) { - if (match_rs == NULL) { - **outbuf = **inbuf; - (*outbuf)++, (*outbuf_left)--; - (*inbuf)++, (*inbuf_left)--; - } else { - size_t i; - for (i = 0; match_rs[i] != NULL; i++) { - const ucs4_t* result = match_rs[i]; - int show_delimiter = match_rs[i + 1] != NULL ? 1 : 0; - /* 輸出緩衝區剩餘空間小於分詞長度 */ - if (ucs4len(result) + show_delimiter > *outbuf_left) { - if (inbuf_left_start - *inbuf_left > 0) { - break; - } - errnum = CONVERTER_ERROR_OUTBUF; - return (size_t)-1; - } - for (; *result; result++) { - **outbuf = *result; - (*outbuf)++, (*outbuf_left)--; - } - if (show_delimiter) { - **outbuf = DELIMITER; - (*outbuf)++, (*outbuf_left)--; - } - } - *inbuf += match_len; - *inbuf_left -= match_len; - } - } else if (converter->conversion_mode == OPENCC_CONVERSION_SEGMENT_ONLY) { - if (match_rs == NULL) { - **outbuf = **inbuf; - (*outbuf)++, (*outbuf_left)--; - (*inbuf)++, (*inbuf_left)--; - } else { - /* 輸出緩衝區剩餘空間小於分詞長度 */ - if (match_len + 1 > *outbuf_left) { - if (inbuf_left_start - *inbuf_left > 0) { - break; - } - errnum = CONVERTER_ERROR_OUTBUF; - return (size_t)-1; - } - size_t i; - for (i = 0; i < match_len; i++) { - **outbuf = **inbuf; - (*outbuf)++, (*outbuf_left)--; - (*inbuf)++, (*inbuf_left)--; - } - } - **outbuf = DELIMITER; - (*outbuf)++, (*outbuf_left)--; - } else { - debug_should_not_be_here(); - } - } - if (converter->conversion_mode == OPENCC_CONVERSION_SEGMENT_ONLY) { - (*outbuf)--; - (*outbuf_left)++; - } - return inbuf_left_start - *inbuf_left; -} - -#endif /* if SEGMENT_METHOD == SEGMENT_MAXIMUM_LENGTH */ - -size_t converter_convert(Converter* converter, - ucs4_t** inbuf, - size_t* inbuf_left, - ucs4_t** outbuf, - size_t* outbuf_left) { - if (converter->dict_chain == NULL) { - errnum = CONVERTER_ERROR_NODICT; - return (size_t)-1; - } - if (converter->dict_chain->count == 1) { - /* 只有一個辭典,直接輸出 */ - return segment(converter, - inbuf, - inbuf_left, - outbuf, - outbuf_left); - } - // 啓用辭典轉換鏈 - size_t inbuf_size = *inbuf_left; - size_t outbuf_size = *outbuf_left; - size_t retval = (size_t)-1; - size_t cinbuf_left, coutbuf_left; - size_t coutbuf_delta = 0; - size_t i, cur; - ucs4_t* tmpbuf = (ucs4_t*)malloc(sizeof(ucs4_t) * outbuf_size); - ucs4_t* orig_outbuf = *outbuf; - ucs4_t* cinbuf, * coutbuf; - cinbuf_left = inbuf_size; - coutbuf_left = outbuf_size; - cinbuf = *inbuf; - coutbuf = tmpbuf; - for (i = cur = 0; i < converter->dict_chain->count; ++i, cur = 1 - cur) { - if (i > 0) { - cinbuf_left = coutbuf_delta; - coutbuf_left = outbuf_size; - - if (cur == 1) { - cinbuf = tmpbuf; - coutbuf = orig_outbuf; - } else { - cinbuf = orig_outbuf; - coutbuf = tmpbuf; - } - } - converter->current_dict_group = dict_chain_get_group( - converter->dict_chain, - i); - size_t ret = segment(converter, - &cinbuf, - &cinbuf_left, - &coutbuf, - &coutbuf_left); - if (ret == (size_t)-1) { - free(tmpbuf); - return (size_t)-1; - } - coutbuf_delta = outbuf_size - coutbuf_left; - if (i == 0) { - retval = ret; - *inbuf = cinbuf; - *inbuf_left = cinbuf_left; - } - } - if (cur == 1) { - // 結果在緩衝區 - memcpy(*outbuf, tmpbuf, coutbuf_delta * sizeof(ucs4_t)); - } - *outbuf += coutbuf_delta; - *outbuf_left = coutbuf_left; - free(tmpbuf); - return retval; -} - -void converter_assign_dictionary(Converter* converter, DictChain* dict_chain) { - converter->dict_chain = dict_chain; - if (converter->dict_chain->count > 0) { - converter->current_dict_group = dict_chain_get_group( - converter->dict_chain, - 0); - } -} - -Converter* converter_open(void) { - Converter* converter = (Converter*)malloc(sizeof(Converter)); - converter->dict_chain = NULL; - converter->current_dict_group = NULL; -#if SEGMENT_METHOD == SEGMENT_SHORTEST_PATH - converter->data = (SpsegData*)malloc(sizeof(SpsegData)); - SpsegData* spseg_buffer = converter->data; - spseg_buffer->initialized = 0; - spseg_buffer->match_length = NULL; - spseg_buffer->min_len = NULL; - spseg_buffer->parent = NULL; - spseg_buffer->path = NULL; - sp_seg_set_buffer_size(spseg_buffer, OPENCC_SP_SEG_DEFAULT_BUFFER_SIZE); -#endif /* if SEGMENT_METHOD == SEGMENT_SHORTEST_PATH */ - return converter; -} - -void converter_close(Converter* converter) { -#if SEGMENT_METHOD == SEGMENT_SHORTEST_PATH - sp_seg_buffer_free(converter->data); - free((SpsegData *)converter->data); -#endif /* if SEGMENT_METHOD == SEGMENT_SHORTEST_PATH */ - free(converter); -} - -void converter_set_conversion_mode(Converter* converter, - opencc_conversion_mode conversion_mode) { - converter->conversion_mode = conversion_mode; -} - -converter_error converter_errno(void) { - return errnum; -} - -void converter_perror(const char* spec) { - perr(spec); - perr("\n"); - switch (errnum) { - case CONVERTER_ERROR_VOID: - break; - case CONVERTER_ERROR_NODICT: - perr(_("No dictionary loaded")); - break; - case CONVERTER_ERROR_OUTBUF: - perr(_("Output buffer not enough for one segment")); - break; - default: - perr(_("Unknown")); - } -} diff --git a/src/converter.h b/src/converter.h deleted file mode 100644 index b21f4da..0000000 --- a/src/converter.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Open Chinese Convert - * - * Copyright 2010-2013 BYVoid - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __CONVERTER_H_ -#define __CONVERTER_H_ - -#include "common.h" -#include "dict_chain.h" - -typedef enum { - CONVERTER_ERROR_VOID, - CONVERTER_ERROR_NODICT, - CONVERTER_ERROR_OUTBUF, -} converter_error; - -void converter_assign_dictionary(Converter* converter, DictChain* DictChain); - -Converter* converter_open(void); - -void converter_close(Converter* converter); - -size_t converter_convert(Converter* converter, - ucs4_t** inbuf, - size_t* inbuf_left, - ucs4_t** outbuf, - size_t* outbuf_left); - -void converter_set_conversion_mode(Converter* converter, - opencc_conversion_mode conversion_mode); - -converter_error converter_errno(void); - -void converter_perror(const char* spec); - -#endif /* __CONVERTER_H_ */ diff --git a/src/dict.c b/src/dict.c deleted file mode 100644 index 2217481..0000000 --- a/src/dict.c +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Open Chinese Convert - * - * Copyright 2010-2013 BYVoid - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "dict.h" -#include "dictionary/datrie.h" -#include "dictionary/text.h" - -Dict* dict_new(const char* filename, opencc_dictionary_type type) { - Dict* dictionary = (Dict*)malloc(sizeof(Dict)); - dictionary->type = type; - switch (type) { - case OPENCC_DICTIONARY_TYPE_TEXT: - dictionary->dict = dict_text_new(filename); - break; - case OPENCC_DICTIONARY_TYPE_DATRIE: - dictionary->dict = dict_datrie_new(filename); - break; - default: - free(dictionary); - dictionary = (Dict*)-1; /* TODO:辭典格式不支持 */ - } - return dictionary; -} - -void dict_delete(Dict* dict) { - switch (dict->type) { - case OPENCC_DICTIONARY_TYPE_TEXT: - dict_text_delete(dict->dict); - break; - case OPENCC_DICTIONARY_TYPE_DATRIE: - dict_datrie_delete(dict->dict); - break; - default: - debug_should_not_be_here(); - } - free(dict); -} - -const ucs4_t* const* dict_match_longest(Dict* dict, - const ucs4_t* word, - size_t maxlen, - size_t* match_length) { - switch (dict->type) { - case OPENCC_DICTIONARY_TYPE_TEXT: - return dict_text_match_longest(dict->dict, - word, - maxlen, - match_length); - break; - case OPENCC_DICTIONARY_TYPE_DATRIE: - return dict_datrie_match_longest(dict->dict, - word, - maxlen, - match_length); - break; - default: - debug_should_not_be_here(); - } - return (const ucs4_t* const*)-1; -} - -size_t dict_get_all_match_lengths(Dict* dict, - const ucs4_t* word, - size_t* match_length) { - switch (dict->type) { - case OPENCC_DICTIONARY_TYPE_TEXT: - return dict_text_get_all_match_lengths(dict->dict, - word, - match_length); - break; - case OPENCC_DICTIONARY_TYPE_DATRIE: - return dict_datrie_get_all_match_lengths(dict->dict, - word, - match_length); - break; - default: - debug_should_not_be_here(); - } - return (size_t)-1; -} diff --git a/src/dict.h b/src/dict.h deleted file mode 100644 index 19c7232..0000000 --- a/src/dict.h +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Open Chinese Convert - * - * Copyright 2010-2013 BYVoid - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __OPENCC_DICTIONARY_ABSTRACT_H_ -#define __OPENCC_DICTIONARY_ABSTRACT_H_ - -#include "common.h" -#include "utils.h" - -Dict* dict_new(const char* filename, opencc_dictionary_type type); - -void dict_delete(Dict* dict); - -const ucs4_t* const* dict_match_longest(Dict* dict, - const ucs4_t* word, - size_t maxlen, - size_t* match_length); - -size_t dict_get_all_match_lengths(Dict* dict, - const ucs4_t* word, - size_t* match_length); - -#endif /* __OPENCC_DICTIONARY_ABSTRACT_H_ */ diff --git a/src/dict_chain.c b/src/dict_chain.c deleted file mode 100644 index de16991..0000000 --- a/src/dict_chain.c +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Open Chinese Convert - * - * Copyright 2010-2013 BYVoid - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "dict_group.h" -#include "dict_chain.h" - -DictChain* dict_chain_new(Config* config) { - DictChain* dict_chain = (DictChain*)malloc(sizeof(DictChain)); - dict_chain->count = 0; - dict_chain->config = config; - return dict_chain; -} - -void dict_chain_delete(DictChain* dict_chain) { - size_t i; - for (i = 0; i < dict_chain->count; i++) { - dict_group_delete(dict_chain->groups[i]); - } - free(dict_chain); -} - -DictGroup* dict_chain_add_group(DictChain* dict_chain) { - if (dict_chain->count + 1 == DICTIONARY_GROUP_MAX_COUNT) { - return (DictGroup*)-1; - } - DictGroup* group = dict_group_new(dict_chain); - dict_chain->groups[dict_chain->count++] = group; - return group; -} - -DictGroup* dict_chain_get_group(DictChain* dict_chain, size_t index) { - if (index >= dict_chain->count) { - return (DictGroup*)-1; - } - return dict_chain->groups[index]; -} diff --git a/src/dict_group.c b/src/dict_group.c deleted file mode 100644 index d8fa64c..0000000 --- a/src/dict_group.c +++ /dev/null @@ -1,189 +0,0 @@ -/* - * Open Chinese Convert - * - * Copyright 2010-2013 BYVoid - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "config_reader.h" -#include "dict_group.h" -#include "dict_chain.h" - -static dictionary_error errnum = DICTIONARY_ERROR_VOID; - -DictGroup* dict_group_new(DictChain* dict_chain) { - DictGroup* dict_group = - (DictGroup*)malloc(sizeof(DictGroup)); - dict_group->count = 0; - dict_group->dict_chain = dict_chain; - return dict_group; -} - -void dict_group_delete(DictGroup* dict_group) { - size_t i; - for (i = 0; i < dict_group->count; i++) { - dict_delete(dict_group->dicts[i]); - } - free(dict_group); -} - -static char* try_find_dictionary_with_config( - DictGroup* dict_group, - const char* filename) { - if (is_absolute_path(filename)) { - return NULL; - } - /* Get config path */ - if (dict_group->dict_chain == NULL) { - return NULL; - } - Config* config = dict_group->dict_chain->config; - if (config == NULL) { - return NULL; - } - const char* config_path = config->file_path; - if (config_path == NULL) { - return NULL; - } - char* config_path_filename = (char*)malloc(strlen(config_path) + strlen( - filename) + 2); - sprintf(config_path_filename, "%s/%s", config_path, filename); - FILE* fp = fopen(config_path_filename, "r"); - if (fp) { - fclose(fp); - return config_path_filename; - } - return NULL; -} - -int dict_group_load(DictGroup* dict_group, - const char* filename, - opencc_dictionary_type type) { - Dict* dictionary; - char* path = try_open_file(filename); - if (path == NULL) { - path = try_find_dictionary_with_config(dict_group, filename); - if (path == NULL) { - errnum = DICTIONARY_ERROR_CANNOT_ACCESS_DICTFILE; - return -1; - } - } - dictionary = dict_new(path, type); - free(path); - if (dictionary == (Dict*)-1) { - errnum = DICTIONARY_ERROR_INVALID_DICT; - return -1; - } - dict_group->dicts[dict_group->count++] = dictionary; - return 0; -} - -Dict* dict_group_get_dict(DictGroup* dict_group, size_t index) { - if (index >= dict_group->count) { - errnum = DICTIONARY_ERROR_INVALID_INDEX; - return (Dict*)-1; - } - return dict_group->dicts[index]; -} - -const ucs4_t* const* dict_group_match_longest( - DictGroup* dict_group, - const ucs4_t* word, - size_t maxlen, - size_t* match_length) { - if (dict_group->count == 0) { - errnum = DICTIONARY_ERROR_NODICT; - return (const ucs4_t* const*)-1; - } - const ucs4_t* const* retval = NULL; - size_t t_match_length, max_length = 0; - size_t i; - for (i = 0; i < dict_group->count; i++) { - /* 依次查找每個辭典,取得最長匹配長度 */ - const ucs4_t* const* t_retval = dict_match_longest( - dict_group->dicts[i], - word, - maxlen, - &t_match_length); - if (t_retval != NULL) { - if (t_match_length > max_length) { - max_length = t_match_length; - retval = t_retval; - } - } - } - if (match_length != NULL) { - *match_length = max_length; - } - return retval; -} - -size_t dict_group_get_all_match_lengths(DictGroup* dict_group, - const ucs4_t* word, - size_t* match_length) { - if (dict_group->count == 0) { - errnum = DICTIONARY_ERROR_NODICT; - return (size_t)-1; - } - size_t rscnt = 0; - size_t i; - for (i = 0; i < dict_group->count; i++) { - size_t retval; - retval = dict_get_all_match_lengths( - dict_group->dicts[i], - word, - match_length + rscnt - ); - rscnt += retval; - /* 去除重複長度 */ - if ((i > 0) && (rscnt > 1)) { - qsort(match_length, rscnt, sizeof(match_length[0]), qsort_int_cmp); - size_t j, k; - for (j = 0, k = 1; k < rscnt; k++) { - if (match_length[k] != match_length[j]) { - match_length[++j] = match_length[k]; - } - } - rscnt = j + 1; - } - } - return rscnt; -} - -dictionary_error dictionary_errno(void) { - return errnum; -} - -void dictionary_perror(const char* spec) { - perr(spec); - perr("\n"); - switch (errnum) { - case DICTIONARY_ERROR_VOID: - break; - case DICTIONARY_ERROR_NODICT: - perr(_("No dictionary loaded")); - break; - case DICTIONARY_ERROR_CANNOT_ACCESS_DICTFILE: - perror(_("Can not open dictionary file")); - break; - case DICTIONARY_ERROR_INVALID_DICT: - perror(_("Invalid dictionary file")); - break; - case DICTIONARY_ERROR_INVALID_INDEX: - perror(_("Invalid dictionary index")); - break; - default: - perr(_("Unknown")); - } -} diff --git a/src/dict_group.h b/src/dict_group.h deleted file mode 100644 index 4032e66..0000000 --- a/src/dict_group.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Open Chinese Convert - * - * Copyright 2010-2013 BYVoid - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __DICTIONARY_GROUP_H_ -#define __DICTIONARY_GROUP_H_ - -#include "common.h" -#include "dict.h" - -typedef enum { - DICTIONARY_ERROR_VOID, - DICTIONARY_ERROR_NODICT, - DICTIONARY_ERROR_CANNOT_ACCESS_DICTFILE, - DICTIONARY_ERROR_INVALID_DICT, - DICTIONARY_ERROR_INVALID_INDEX, -} dictionary_error; - -DictGroup* dict_group_new(DictChain* t_DictChain); - -void dict_group_delete(DictGroup* dict_group); - -int dict_group_load(DictGroup* dict_group, - const char* filename, - opencc_dictionary_type type); - -const ucs4_t* const* dict_group_match_longest( - DictGroup* dict_group, - const ucs4_t* word, - size_t maxlen, - size_t* match_length); - -size_t dict_group_get_all_match_lengths(DictGroup* dict_group, - const ucs4_t* word, - size_t* match_length); - -Dict* dict_group_get_dict(DictGroup* dict_group, size_t index); - -dictionary_error dictionary_errno(void); - -void dictionary_perror(const char* spec); - -#endif /* __DICTIONARY_GROUP_H_ */ diff --git a/src/dictionary/datrie.c b/src/dictionary/datrie.c deleted file mode 100644 index 5d7a8a2..0000000 --- a/src/dictionary/datrie.c +++ /dev/null @@ -1,315 +0,0 @@ -/* - * Open Chinese Convert - * - * Copyright 2010-2013 BYVoid - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "datrie.h" -#include -#include - -#ifdef __WIN32 - -/* Todo: Win32 mmap*/ -#else /* ifdef __WIN32 */ -# include -# define MMAP_ENABLED -#endif /* ifdef __WIN32 */ - -typedef enum { - MEMORY_TYPE_MMAP, - MEMORY_TYPE_ALLOCATE -} memory_type; - -typedef struct { - const DatrieItem* dat; - uint32_t dat_item_count; - ucs4_t* lexicon; - uint32_t lexicon_count; - - ucs4_t*** lexicon_set; - void* dic_memory; - size_t dic_size; - memory_type dic_memory_type; -} DatrieDict; - -static int load_allocate(DatrieDict* datrie_dictionary, int fd) { - datrie_dictionary->dic_memory_type = MEMORY_TYPE_ALLOCATE; - datrie_dictionary->dic_memory = malloc(datrie_dictionary->dic_size); - - if (datrie_dictionary->dic_memory == NULL) { - /* 內存申請失敗 */ - return -1; - } - lseek(fd, 0, SEEK_SET); - - if (read(fd, datrie_dictionary->dic_memory, - datrie_dictionary->dic_size) == -1) { - /* 讀取失敗 */ - return -1; - } - return 0; -} - -static int load_mmap(DatrieDict* datrie_dictionary, int fd) { -#ifdef MMAP_ENABLED - datrie_dictionary->dic_memory_type = MEMORY_TYPE_MMAP; - datrie_dictionary->dic_memory = mmap(NULL, - datrie_dictionary->dic_size, - PROT_READ, - MAP_PRIVATE, - fd, - 0); - - if (datrie_dictionary->dic_memory == MAP_FAILED) { - /* 內存映射創建失敗 */ - datrie_dictionary->dic_memory = NULL; - return -1; - } - return 0; - -#else /* ifdef MMAP_ENABLED */ - return -1; - -#endif /* ifdef MMAP_ENABLED */ -} - -static int load_dict(DatrieDict* datrie_dictionary, FILE* fp) { - int fd = fileno(fp); - - fseek(fp, 0, SEEK_END); - datrie_dictionary->dic_size = ftell(fp); - - /* 首先嘗試mmap,如果失敗嘗試申請內存 */ - if (load_mmap(datrie_dictionary, fd) == -1) { - if (load_allocate(datrie_dictionary, fd) == -1) { - return -1; - } - } - - size_t header_len = strlen("OPENCCDATRIE"); - - if (strncmp((const char*)datrie_dictionary->dic_memory, "OPENCCDATRIE", - header_len) != 0) { - return -1; - } - - size_t offset = 0; - - offset += header_len * sizeof(char); - - /* 詞彙表 */ - uint32_t lexicon_length = - *((uint32_t*)(datrie_dictionary->dic_memory + offset)); - offset += sizeof(uint32_t); - - datrie_dictionary->lexicon = (ucs4_t*)(datrie_dictionary->dic_memory + offset); - offset += lexicon_length * sizeof(ucs4_t); - - /* 詞彙索引表 */ - uint32_t lexicon_index_length = - *((uint32_t*)(datrie_dictionary->dic_memory + offset)); - offset += sizeof(uint32_t); - - uint32_t* lexicon_index = (uint32_t*)(datrie_dictionary->dic_memory + offset); - offset += lexicon_index_length * sizeof(uint32_t); - - datrie_dictionary->lexicon_count = - *((uint32_t*)(datrie_dictionary->dic_memory + offset)); - offset += sizeof(uint32_t); - - datrie_dictionary->dat_item_count = - *((uint32_t*)(datrie_dictionary->dic_memory + offset)); - offset += sizeof(uint32_t); - - datrie_dictionary->dat = - (DatrieItem*)(datrie_dictionary->dic_memory + offset); - - /* 構造索引表 */ - datrie_dictionary->lexicon_set = (ucs4_t***)malloc( - datrie_dictionary->lexicon_count * sizeof(ucs4_t * *)); - size_t i, last = 0; - - for (i = 0; i < datrie_dictionary->lexicon_count; i++) { - size_t count, j; - - for (j = last; j < lexicon_index_length; j++) { - if (lexicon_index[j] == (uint32_t)-1) { - break; - } - } - count = j - last; - - datrie_dictionary->lexicon_set[i] = - (ucs4_t**)malloc((count + 1) * sizeof(ucs4_t*)); - - for (j = 0; j < count; j++) { - datrie_dictionary->lexicon_set[i][j] = - datrie_dictionary->lexicon + lexicon_index[last + j]; - } - datrie_dictionary->lexicon_set[i][count] = NULL; - last += j + 1; - } - - return 0; -} - -static int unload_dict(DatrieDict* datrie_dictionary) { - if (datrie_dictionary->dic_memory != NULL) { - size_t i; - - for (i = 0; i < datrie_dictionary->lexicon_count; i++) { - free(datrie_dictionary->lexicon_set[i]); - } - free(datrie_dictionary->lexicon_set); - - if (MEMORY_TYPE_MMAP == datrie_dictionary->dic_memory_type) { - #ifdef MMAP_ENABLED - return munmap(datrie_dictionary->dic_memory, datrie_dictionary->dic_size); - - #else /* ifdef MMAP_ENABLED */ - debug_should_not_be_here(); - #endif /* ifdef MMAP_ENABLED */ - } else if (MEMORY_TYPE_ALLOCATE == datrie_dictionary->dic_memory_type) { - free(datrie_dictionary->dic_memory); - } else { - return -1; - } - } - return 0; -} - -Dict* dict_datrie_new(const char* filename) { - DatrieDict* datrie_dictionary = (DatrieDict*)malloc( - sizeof(DatrieDict)); - - datrie_dictionary->dat = NULL; - datrie_dictionary->lexicon = NULL; - - FILE* fp = fopen(filename, "rb"); - - if (load_dict(datrie_dictionary, fp) == -1) { - dict_datrie_delete((Dict*)datrie_dictionary); - return (Dict*)-1; - } - - fclose(fp); - - return (Dict*)datrie_dictionary; -} - -int dict_datrie_delete(Dict* dict) { - DatrieDict* datrie_dictionary = - (DatrieDict*)dict; - - if (unload_dict(datrie_dictionary) == -1) { - free(datrie_dictionary); - return -1; - } - - free(datrie_dictionary); - return 0; -} - -int encode_char(ucs4_t ch) { - return (int)ch; -} - -void datrie_match(const DatrieDict* datrie_dictionary, - const ucs4_t* word, - size_t* match_pos, - size_t* id, - size_t limit) { - int i, p; - - for (i = 0, p = 0; word[p] && (limit == 0 || (size_t)p < limit) && - datrie_dictionary->dat[i].base != DATRIE_UNUSED; p++) { - int k = encode_char(word[p]); - int j = datrie_dictionary->dat[i].base + k; - - if ((j < 0) || ((size_t)j >= datrie_dictionary->dat_item_count) || - (datrie_dictionary->dat[j].parent != i)) { - break; - } - i = j; - } - - if (match_pos) { - *match_pos = p; - } - - if (id) { - *id = i; - } -} - -const ucs4_t* const* dict_datrie_match_longest(Dict* dict, - const ucs4_t* word, - size_t maxlen, - size_t* match_length) { - DatrieDict* datrie_dictionary = - (DatrieDict*)dict; - - size_t pos, item; - - datrie_match(datrie_dictionary, word, &pos, &item, maxlen); - - while (datrie_dictionary->dat[item].word == -1 && pos > 1) { - datrie_match(datrie_dictionary, word, &pos, &item, pos - 1); - } - - if ((pos == 0) || (datrie_dictionary->dat[item].word == -1)) { - if (match_length != NULL) { - *match_length = 0; - } - return NULL; - } - - if (match_length != NULL) { - *match_length = pos; - } - - return (const ucs4_t* const*) - datrie_dictionary->lexicon_set[datrie_dictionary->dat[item].word]; -} - -size_t dict_datrie_get_all_match_lengths(Dict* dict, - const ucs4_t* word, - size_t* match_length) { - DatrieDict* datrie_dictionary = - (DatrieDict*)dict; - - size_t rscnt = 0; - - int i, p; - - for (i = 0, p = 0; word[p] && datrie_dictionary->dat[i].base != DATRIE_UNUSED; - p++) { - int k = encode_char(word[p]); - int j = datrie_dictionary->dat[i].base + k; - - if ((j < 0) || ((size_t)j >= datrie_dictionary->dat_item_count) || - (datrie_dictionary->dat[j].parent != i)) { - break; - } - i = j; - - if (datrie_dictionary->dat[i].word != -1) { - match_length[rscnt++] = p + 1; - } - } - - return rscnt; -} diff --git a/src/dictionary/datrie.h b/src/dictionary/datrie.h deleted file mode 100644 index 4f330ea..0000000 --- a/src/dictionary/datrie.h +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Open Chinese Convert - * - * Copyright 2010-2013 BYVoid - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __OPENCC_DICTIONARY_DATRIE_H_ -#define __OPENCC_DICTIONARY_DATRIE_H_ - -#include "../dict.h" - -#define DATRIE_UNUSED -1 - -typedef struct { - int base; - int parent; - int word; -} DatrieItem; - -Dict* dict_datrie_new(const char* filename); - -int dict_datrie_delete(Dict* dict); - -const ucs4_t* const* dict_datrie_match_longest(Dict* dict, - const ucs4_t* word, - size_t maxlen, - size_t* match_length); - -size_t dict_datrie_get_all_match_lengths(Dict* dict, - const ucs4_t* word, - size_t* match_length); - -int encode_char(ucs4_t ch); - -#endif /* __OPENCC_DICTIONARY_DATRIE_H_ */ diff --git a/src/dictionary/text.c b/src/dictionary/text.c deleted file mode 100644 index 3263a41..0000000 --- a/src/dictionary/text.c +++ /dev/null @@ -1,286 +0,0 @@ -/* - * Open Chinese Convert - * - * Copyright 2010-2013 BYVoid - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../encoding.h" -#include "text.h" - -#define INITIAL_DICTIONARY_SIZE 1024 -#define ENTRY_BUFF_SIZE 128 -#define ENTRY_WBUFF_SIZE ENTRY_BUFF_SIZE / sizeof(size_t) - -int qsort_entry_cmp(const void* a, const void* b) { - return ucs4cmp(((TextEntry*)a)->key, ((TextEntry*)b)->key); -} - -int parse_entry(const char* buff, TextEntry* entry_i) { - size_t length; - const char* pbuff; - - /* 解析鍵 */ - for (pbuff = buff; *pbuff != '\t' && *pbuff != '\0'; ++pbuff) {} - - if (*pbuff == '\0') { - return -1; - } - length = pbuff - buff; - - ucs4_t* ucs4_buff; - ucs4_buff = utf8_to_ucs4(buff, length); - - if (ucs4_buff == (ucs4_t*)-1) { - return -1; - } - entry_i->key = (ucs4_t*)malloc((length + 1) * sizeof(ucs4_t)); - ucs4cpy(entry_i->key, ucs4_buff); - free(ucs4_buff); - - /* 解析值 */ - size_t value_i, value_count = INITIAL_DICTIONARY_SIZE; - entry_i->value = (ucs4_t**)malloc(value_count * sizeof(ucs4_t*)); - - for (value_i = 0; *pbuff != '\0' && *pbuff != '\n'; ++value_i) { - if (value_i >= value_count) { - value_count += value_count; - entry_i->value = (ucs4_t**)realloc( - entry_i->value, - value_count * sizeof(ucs4_t*) - ); - } - - for (buff = ++pbuff; - *pbuff != ' ' && *pbuff != '\0' && *pbuff != '\n' && *pbuff != '\r'; - ++pbuff) {} - length = pbuff - buff; - ucs4_buff = utf8_to_ucs4(buff, length); - - if (ucs4_buff == (ucs4_t*)-1) { - /* 發生錯誤 回退內存申請 */ - ssize_t i; - - for (i = value_i - 1; i >= 0; --i) { - free(entry_i->value[i]); - } - free(entry_i->value); - free(entry_i->key); - return -1; - } - - entry_i->value[value_i] = (ucs4_t*)malloc((length + 1) * sizeof(ucs4_t)); - ucs4cpy(entry_i->value[value_i], ucs4_buff); - free(ucs4_buff); - } - - entry_i->value = (ucs4_t**)realloc( - entry_i->value, - value_count * sizeof(ucs4_t*) - ); - entry_i->value[value_i] = NULL; - - return 0; -} - -Dict* dict_text_new(const char* filename) { - TextDict* text_dictionary; - - text_dictionary = (TextDict*)malloc(sizeof(TextDict)); - text_dictionary->entry_count = INITIAL_DICTIONARY_SIZE; - text_dictionary->max_length = 0; - text_dictionary->lexicon = (TextEntry*)malloc( - sizeof(TextEntry) * text_dictionary->entry_count); - text_dictionary->word_buff = NULL; - - static char buff[ENTRY_BUFF_SIZE]; - - FILE* fp = fopen(filename, "r"); - - if (fp == NULL) { - dict_text_delete((Dict*)text_dictionary); - return (Dict*)-1; - } - skip_utf8_bom(fp); - - size_t i = 0; - - while (fgets(buff, ENTRY_BUFF_SIZE, fp)) { - if (i >= text_dictionary->entry_count) { - text_dictionary->entry_count += text_dictionary->entry_count; - text_dictionary->lexicon = (TextEntry*)realloc( - text_dictionary->lexicon, - sizeof(TextEntry) * text_dictionary->entry_count - ); - } - - if (parse_entry(buff, text_dictionary->lexicon + i) == -1) { - text_dictionary->entry_count = i; - dict_text_delete((Dict*)text_dictionary); - return (Dict*)-1; - } - - size_t length = ucs4len(text_dictionary->lexicon[i].key); - - if (length > text_dictionary->max_length) { - text_dictionary->max_length = length; - } - - i++; - } - - fclose(fp); - - text_dictionary->entry_count = i; - text_dictionary->lexicon = (TextEntry*)realloc( - text_dictionary->lexicon, - sizeof(TextEntry) * text_dictionary->entry_count - ); - text_dictionary->word_buff = (ucs4_t*) - malloc(sizeof(ucs4_t) * - (text_dictionary->max_length + 1)); - - qsort(text_dictionary->lexicon, - text_dictionary->entry_count, - sizeof(text_dictionary->lexicon[0]), - qsort_entry_cmp - ); - - return (Dict*)text_dictionary; -} - -void dict_text_delete(Dict* dict) { - TextDict* text_dictionary = (TextDict*)dict; - - size_t i; - - for (i = 0; i < text_dictionary->entry_count; ++i) { - free(text_dictionary->lexicon[i].key); - - ucs4_t** j; - - for (j = text_dictionary->lexicon[i].value; *j; ++j) { - free(*j); - } - free(text_dictionary->lexicon[i].value); - } - - free(text_dictionary->lexicon); - free(text_dictionary->word_buff); - free(text_dictionary); -} - -const ucs4_t* const* dict_text_match_longest(Dict* dict, - const ucs4_t* word, - size_t maxlen, - size_t* match_length) { - TextDict* text_dictionary = (TextDict*)dict; - - if (text_dictionary->entry_count == 0) { - return NULL; - } - - if (maxlen == 0) { - maxlen = ucs4len(word); - } - size_t len = text_dictionary->max_length; - - if (maxlen < len) { - len = maxlen; - } - - ucs4ncpy(text_dictionary->word_buff, word, len); - text_dictionary->word_buff[len] = L'\0'; - - TextEntry buff; - buff.key = text_dictionary->word_buff; - - for (; len > 0; len--) { - text_dictionary->word_buff[len] = L'\0'; - TextEntry* brs = (TextEntry*)bsearch( - &buff, - text_dictionary->lexicon, - text_dictionary->entry_count, - sizeof(text_dictionary->lexicon[0]), - qsort_entry_cmp - ); - - if (brs != NULL) { - if (match_length != NULL) { - *match_length = len; - } - return (const ucs4_t* const*)brs->value; - } - } - - if (match_length != NULL) { - *match_length = 0; - } - return NULL; -} - -size_t dict_text_get_all_match_lengths(Dict* dict, - const ucs4_t* word, - size_t* match_length) { - TextDict* text_dictionary = (TextDict*)dict; - - size_t rscnt = 0; - - if (text_dictionary->entry_count == 0) { - return rscnt; - } - - size_t length = ucs4len(word); - size_t len = text_dictionary->max_length; - - if (length < len) { - len = length; - } - - ucs4ncpy(text_dictionary->word_buff, word, len); - text_dictionary->word_buff[len] = L'\0'; - - TextEntry buff; - buff.key = text_dictionary->word_buff; - - for (; len > 0; len--) { - text_dictionary->word_buff[len] = L'\0'; - TextEntry* brs = (TextEntry*)bsearch( - &buff, - text_dictionary->lexicon, - text_dictionary->entry_count, - sizeof(text_dictionary->lexicon[0]), - qsort_entry_cmp - ); - - if (brs != NULL) { - match_length[rscnt++] = len; - } - } - - return rscnt; -} - -size_t dict_text_get_lexicon(Dict* dict, TextEntry* lexicon) { - TextDict* text_dictionary = (TextDict*)dict; - - size_t i; - - for (i = 0; i < text_dictionary->entry_count; i++) { - lexicon[i].key = text_dictionary->lexicon[i].key; - lexicon[i].value = text_dictionary->lexicon[i].value; - } - - return text_dictionary->entry_count; -} diff --git a/src/dictionary/text.h b/src/dictionary/text.h deleted file mode 100644 index 7519f9f..0000000 --- a/src/dictionary/text.h +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Open Chinese Convert - * - * Copyright 2010-2013 BYVoid - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __OPENCC_DICTIONARY_TEXT_H_ -#define __OPENCC_DICTIONARY_TEXT_H_ - -#include "../dict.h" - -typedef struct { - ucs4_t* key; - ucs4_t** value; -} TextEntry; - -typedef struct { - size_t entry_count; - size_t max_length; - TextEntry* lexicon; - ucs4_t* word_buff; -} TextDict; - -Dict* dict_text_new(const char* filename); - -void dict_text_delete(Dict* dict); - -const ucs4_t* const* dict_text_match_longest(Dict* dict, - const ucs4_t* word, - size_t maxlen, - size_t* match_length); - -size_t dict_text_get_all_match_lengths(Dict* dict, - const ucs4_t* word, - size_t* match_length); - -size_t dict_text_get_lexicon(Dict* dict, TextEntry* lexicon); - -#endif /* __OPENCC_DICTIONARY_TEXT_H_ */ diff --git a/src/encoding.c b/src/encoding.c deleted file mode 100644 index f32224b..0000000 --- a/src/encoding.c +++ /dev/null @@ -1,242 +0,0 @@ -/* - * Open Chinese Convert - * - * Copyright 2010-2013 BYVoid - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "encoding.h" -#include "opencc.h" - -#define INITIAL_BUFF_SIZE 1024 -#define GET_BIT(byte, pos) (((byte) >> (pos))& 1) -#define BITMASK(length) ((1 << length) - 1) - -ucs4_t* utf8_to_ucs4(const char* utf8, size_t length) { - if (length == 0) { - length = (size_t)-1; - } - size_t i; - for (i = 0; i < length && utf8[i] != '\0'; i++) {} - length = i; - size_t freesize = INITIAL_BUFF_SIZE; - ucs4_t* ucs4 = (ucs4_t*)malloc(sizeof(ucs4_t) * freesize); - ucs4_t* pucs4 = ucs4; - for (i = 0; i < length; i++) { - ucs4_t byte[4] = { 0 }; - if (GET_BIT(utf8[i], 7) == 0) { - /* U-00000000 - U-0000007F */ - /* 0xxxxxxx */ - byte[0] = utf8[i] & BITMASK(7); - } else if (GET_BIT(utf8[i], 5) == 0) { - /* U-00000080 - U-000007FF */ - /* 110xxxxx 10xxxxxx */ - if (i + 1 >= length) { - goto err; - } - byte[0] = (utf8[i + 1] & BITMASK(6)) + - ((utf8[i] & BITMASK(2)) << 6); - byte[1] = (utf8[i] >> 2) & BITMASK(3); - i += 1; - } else if (GET_BIT(utf8[i], 4) == 0) { - /* U-00000800 - U-0000FFFF */ - /* 1110xxxx 10xxxxxx 10xxxxxx */ - if (i + 2 >= length) { - goto err; - } - byte[0] = (utf8[i + 2] & BITMASK(6)) + - ((utf8[i + 1] & BITMASK(2)) << 6); - byte[1] = ((utf8[i + 1] >> 2) & BITMASK(4)) - + ((utf8[i] & BITMASK(4)) << 4); - i += 2; - } else if (GET_BIT(utf8[i], 3) == 0) { - /* U-00010000 - U-001FFFFF */ - /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ - if (i + 3 >= length) { - goto err; - } - byte[0] = (utf8[i + 3] & BITMASK(6)) + - ((utf8[i + 2] & BITMASK(2)) << 6); - byte[1] = ((utf8[i + 2] >> 2) & BITMASK(4)) + - ((utf8[i + 1] & BITMASK(4)) << 4); - byte[2] = ((utf8[i + 1] >> 4) & BITMASK(2)) + - ((utf8[i] & BITMASK(3)) << 2); - i += 3; - } else if (GET_BIT(utf8[i], 2) == 0) { - /* U-00200000 - U-03FFFFFF */ - /* 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */ - if (i + 4 >= length) { - goto err; - } - byte[0] = (utf8[i + 4] & BITMASK(6)) + - ((utf8[i + 3] & BITMASK(2)) << 6); - byte[1] = ((utf8[i + 3] >> 2) & BITMASK(4)) + - ((utf8[i + 2] & BITMASK(4)) << 4); - byte[2] = ((utf8[i + 2] >> 4) & BITMASK(2)) + - ((utf8[i + 1] & BITMASK(6)) << 2); - byte[3] = utf8[i] & BITMASK(2); - i += 4; - } else if (GET_BIT(utf8[i], 1) == 0) { - /* U-04000000 - U-7FFFFFFF */ - /* 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */ - if (i + 5 >= length) { - goto err; - } - byte[0] = (utf8[i + 5] & BITMASK(6)) + - ((utf8[i + 4] & BITMASK(2)) << 6); - byte[1] = ((utf8[i + 4] >> 2) & BITMASK(4)) + - ((utf8[i + 3] & BITMASK(4)) << 4); - byte[2] = ((utf8[i + 3] >> 4) & BITMASK(2)) + - ((utf8[i + 2] & BITMASK(6)) << 2); - byte[3] = (utf8[i + 1] & BITMASK(6)) + - ((utf8[i] & BITMASK(1)) << 6); - i += 5; - } else { - goto err; - } - if (freesize == 0) { - freesize = pucs4 - ucs4; - ucs4 = (ucs4_t*)realloc(ucs4, sizeof(ucs4_t) * (freesize + freesize)); - pucs4 = ucs4 + freesize; - } - *pucs4 = (byte[3] << 24) + (byte[2] << 16) + (byte[1] << 8) + byte[0]; - pucs4++; - freesize--; - } - length = (pucs4 - ucs4 + 1); - ucs4 = (ucs4_t*)realloc(ucs4, sizeof(ucs4_t) * length); - ucs4[length - 1] = 0; - return ucs4; - -err: - free(ucs4); - return (ucs4_t*)-1; -} - -char* ucs4_to_utf8(const ucs4_t* ucs4, size_t length) { - if (length == 0) { - length = (size_t)-1; - } - size_t i; - for (i = 0; i < length && ucs4[i] != 0; i++) {} - length = i; - size_t freesize = INITIAL_BUFF_SIZE; - char* utf8 = (char*)malloc(sizeof(char) * freesize); - char* putf8 = utf8; - for (i = 0; i < length; i++) { - if ((ssize_t)freesize - 6 <= 0) { - freesize = putf8 - utf8; - utf8 = (char*)realloc(utf8, sizeof(char) * (freesize + freesize)); - putf8 = utf8 + freesize; - } - ucs4_t c = ucs4[i]; - ucs4_t byte[4] = { - (c >> 0) & BITMASK(8), (c >> 8) & BITMASK(8), - (c >> 16) & BITMASK(8), (c >> 24) & BITMASK(8) - }; - size_t delta = 0; - if (c <= 0x7F) { - /* U-00000000 - U-0000007F */ - /* 0xxxxxxx */ - putf8[0] = byte[0] & BITMASK(7); - delta = 1; - } else if (c <= 0x7FF) { - /* U-00000080 - U-000007FF */ - /* 110xxxxx 10xxxxxx */ - putf8[1] = 0x80 + (byte[0] & BITMASK(6)); - putf8[0] = 0xC0 + ((byte[0] >> 6) & BITMASK(2)) + - ((byte[1] & BITMASK(3)) << 2); - delta = 2; - } else if (c <= 0xFFFF) { - /* U-00000800 - U-0000FFFF */ - /* 1110xxxx 10xxxxxx 10xxxxxx */ - putf8[2] = 0x80 + (byte[0] & BITMASK(6)); - putf8[1] = 0x80 + ((byte[0] >> 6) & BITMASK(2)) + - ((byte[1] & BITMASK(4)) << 2); - putf8[0] = 0xE0 + ((byte[1] >> 4) & BITMASK(4)); - delta = 3; - } else if (c <= 0x1FFFFF) { - /* U-00010000 - U-001FFFFF */ - /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ - putf8[3] = 0x80 + (byte[0] & BITMASK(6)); - putf8[2] = 0x80 + ((byte[0] >> 6) & BITMASK(2)) + - ((byte[1] & BITMASK(4)) << 2); - putf8[1] = 0x80 + ((byte[1] >> 4) & BITMASK(4)) + - ((byte[2] & BITMASK(2)) << 4); - putf8[0] = 0xF0 + ((byte[2] >> 2) & BITMASK(3)); - delta = 4; - } else if (c <= 0x3FFFFFF) { - /* U-00200000 - U-03FFFFFF */ - /* 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */ - putf8[4] = 0x80 + (byte[0] & BITMASK(6)); - putf8[3] = 0x80 + ((byte[0] >> 6) & BITMASK(2)) + - ((byte[1] & BITMASK(4)) << 2); - putf8[2] = 0x80 + ((byte[1] >> 4) & BITMASK(4)) + - ((byte[2] & BITMASK(2)) << 4); - putf8[1] = 0x80 + ((byte[2] >> 2) & BITMASK(6)); - putf8[0] = 0xF8 + (byte[3] & BITMASK(2)); - delta = 5; - } else if (c <= 0x7FFFFFFF) { - /* U-04000000 - U-7FFFFFFF */ - /* 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */ - putf8[5] = 0x80 + (byte[0] & BITMASK(6)); - putf8[4] = 0x80 + ((byte[0] >> 6) & BITMASK(2)) + - ((byte[1] & BITMASK(4)) << 2); - putf8[3] = 0x80 + ((byte[1] >> 4) & BITMASK(4)) + - ((byte[2] & BITMASK(2)) << 4); - putf8[2] = 0x80 + ((byte[2] >> 2) & BITMASK(6)); - putf8[1] = 0x80 + (byte[3] & BITMASK(6)); - putf8[0] = 0xFC + ((byte[3] >> 6) & BITMASK(1)); - delta = 6; - } else { - free(utf8); - return (char*)-1; - } - putf8 += delta; - freesize -= delta; - } - length = (putf8 - utf8 + 1); - utf8 = (char*)realloc(utf8, sizeof(char) * length); - utf8[length - 1] = '\0'; - return utf8; -} - -size_t ucs4len(const ucs4_t* str) { - const register ucs4_t* pstr = str; - while (*pstr) { - ++pstr; - } - return pstr - str; -} - -int ucs4cmp(const ucs4_t* src, const ucs4_t* dst) { - register int ret = 0; - while (!(ret = *src - *dst) && *dst) { - ++src, ++dst; - } - return ret; -} - -void ucs4cpy(ucs4_t* dest, const ucs4_t* src) { - while (*src) { - *dest++ = *src++; - } - *dest = 0; -} - -void ucs4ncpy(ucs4_t* dest, const ucs4_t* src, size_t len) { - while (*src && len-- > 0) { - *dest++ = *src++; - } -} diff --git a/src/encoding.h b/src/encoding.h deleted file mode 100644 index b9d19d9..0000000 --- a/src/encoding.h +++ /dev/null @@ -1,54 +0,0 @@ -/** - * @file - * UCS4-UTF8 Encoding module. - * - * @license - * Open Chinese Convert - * - * Copyright 2010-2013 BYVoid - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __OPENCC_ENCODING_H_ -#define __OPENCC_ENCODING_H_ - -#include "common.h" - -/** - * Converts a UTF-8 string into UCS-4. - * - * @param utf8 UTF-8 string - * @param length Length of UTF-8 string or 0 to consider as \0 ended string - * @return The converted UCS-4 string. Must be free when not in use. - */ -ucs4_t* utf8_to_ucs4(const char* utf8, size_t length); - -/** - * Converts a UCS-4 string into UTF-8. - * - * @param ucs4 UCS-4 string - * @param length Length of UCS-4 string or 0 to consider as \0 ended string - * @return The converted UTF-8 string. Must be free when not in use. - */ -char* ucs4_to_utf8(const ucs4_t* ucs4, size_t length); - -size_t ucs4len(const ucs4_t* str); - -int ucs4cmp(const ucs4_t* str1, const ucs4_t* str2); - -void ucs4cpy(ucs4_t* dest, const ucs4_t* src); - -void ucs4ncpy(ucs4_t* dest, const ucs4_t* src, size_t len); - -#endif /* __OPENCC_ENCODING_H_ */ diff --git a/src/opencc.c b/src/opencc.c deleted file mode 100644 index af2cd68..0000000 --- a/src/opencc.c +++ /dev/null @@ -1,245 +0,0 @@ -/** - * @file - * OpenCC API. - * - * @license - * Open Chinese Convert - * - * Copyright 2010-2013 BYVoid - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "common.h" -#include "config_reader.h" -#include "converter.h" -#include "dict_group.h" -#include "dict_chain.h" -#include "encoding.h" -#include "opencc.h" - -typedef struct { - DictChain* dict_chain; - Converter* converter; -} OpenccDesc; - -static opencc_error errnum = OPENCC_ERROR_VOID; -static int lib_initialized = 0; - -static void lib_initialize(void) { -#ifdef ENABLE_GETTEXT - bindtextdomain(PACKAGE_NAME, LOCALEDIR); -#endif /* ifdef ENABLE_GETTEXT */ - lib_initialized = 1; -} - -size_t opencc_convert(opencc_t t_opencc, - ucs4_t** inbuf, - size_t* inbuf_left, - ucs4_t** outbuf, - size_t* outbuf_left) { - if (!lib_initialized) { - lib_initialize(); - } - OpenccDesc* opencc = (OpenccDesc*)t_opencc; - size_t retval = converter_convert(opencc->converter, - inbuf, - inbuf_left, - outbuf, - outbuf_left); - if (retval == (size_t)-1) { - errnum = OPENCC_ERROR_CONVERTER; - } - return retval; -} - -char* opencc_convert_utf8(opencc_t t_opencc, const char* inbuf, size_t length) { - if (!lib_initialized) { - lib_initialize(); - } - size_t actual_length = strlen(inbuf); - if ((length == (size_t)-1) || (length > actual_length)) { - length = actual_length; - } - ucs4_t* winbuf = utf8_to_ucs4(inbuf, length); - if (winbuf == (ucs4_t*)-1) { - /* Can not convert input UTF8 to UCS4 */ - errnum = OPENCC_ERROR_ENCODING; - return (char*)-1; - } - /* Set up UTF8 buffer */ - size_t outbuf_len = length; - size_t outsize = outbuf_len; - char* original_outbuf = (char*)malloc(sizeof(char) * (outbuf_len + 1)); - char* outbuf = original_outbuf; - original_outbuf[0] = '\0'; - /* Set conversion buffer */ - size_t wbufsize = length + 64; - ucs4_t* woutbuf = (ucs4_t*)malloc(sizeof(ucs4_t) * (wbufsize + 1)); - ucs4_t* pinbuf = winbuf; - ucs4_t* poutbuf = woutbuf; - size_t inbuf_left, outbuf_left; - inbuf_left = ucs4len(winbuf); - outbuf_left = wbufsize; - while (inbuf_left > 0) { - size_t retval = opencc_convert(t_opencc, - &pinbuf, - &inbuf_left, - &poutbuf, - &outbuf_left); - if (retval == (size_t)-1) { - free(outbuf); - free(winbuf); - free(woutbuf); - return (char*)-1; - } - *poutbuf = L'\0'; - char* ubuff = ucs4_to_utf8(woutbuf, (size_t)-1); - if (ubuff == (char*)-1) { - free(outbuf); - free(winbuf); - free(woutbuf); - errnum = OPENCC_ERROR_ENCODING; - return (char*)-1; - } - size_t ubuff_len = strlen(ubuff); - while (ubuff_len > outsize) { - size_t outbuf_offset = outbuf - original_outbuf; - outsize += outbuf_len; - outbuf_len += outbuf_len; - original_outbuf = - (char*)realloc(original_outbuf, sizeof(char) * outbuf_len); - outbuf = original_outbuf + outbuf_offset; - } - strncpy(outbuf, ubuff, ubuff_len); - free(ubuff); - outbuf += ubuff_len; - *outbuf = '\0'; - outbuf_left = wbufsize; - poutbuf = woutbuf; - } - free(winbuf); - free(woutbuf); - original_outbuf = (char*)realloc(original_outbuf, - sizeof(char) * (strlen(original_outbuf) + 1)); - return original_outbuf; -} - -void opencc_convert_utf8_free(char* buf) { - free(buf); -} - -opencc_t opencc_open(const char* config_file) { - if (!lib_initialized) { - lib_initialize(); - } - OpenccDesc* opencc; - opencc = (OpenccDesc*)malloc(sizeof(OpenccDesc)); - opencc->dict_chain = NULL; - opencc->converter = converter_open(); - converter_set_conversion_mode(opencc->converter, OPENCC_CONVERSION_FAST); - if (config_file == NULL) { - /* TODO load default */ - assert(0); - } else { - /* Load config */ - Config* config = config_open(config_file); - if (config == (Config*)-1) { - errnum = OPENCC_ERROR_CONFIG; - return (opencc_t)-1; - } - opencc->dict_chain = config_get_dict_chain(config); - converter_assign_dictionary(opencc->converter, opencc->dict_chain); - config_close(config); - } - return (opencc_t)opencc; -} - -int opencc_close(opencc_t t_opencc) { - if (!lib_initialized) { - lib_initialize(); - } - OpenccDesc* opencc = (OpenccDesc*)t_opencc; - converter_close(opencc->converter); - if (opencc->dict_chain != NULL) { - dict_chain_delete(opencc->dict_chain); - } - free(opencc); - return 0; -} - -int opencc_dict_load(opencc_t t_opencc, - const char* dict_filename, - opencc_dictionary_type dict_type) { - if (!lib_initialized) { - lib_initialize(); - } - OpenccDesc* opencc = (OpenccDesc*)t_opencc; - DictGroup* DictGroup; - if (opencc->dict_chain == NULL) { - opencc->dict_chain = dict_chain_new(NULL); - DictGroup = dict_chain_add_group(opencc->dict_chain); - } else { - DictGroup = dict_chain_get_group(opencc->dict_chain, 0); - } - int retval = dict_group_load(DictGroup, dict_filename, dict_type); - if (retval == -1) { - errnum = OPENCC_ERROR_DICTLOAD; - return -1; - } - converter_assign_dictionary(opencc->converter, opencc->dict_chain); - return retval; -} - -void opencc_set_conversion_mode(opencc_t t_opencc, - opencc_conversion_mode conversion_mode) { - if (!lib_initialized) { - lib_initialize(); - } - OpenccDesc* opencc = (OpenccDesc*)t_opencc; - converter_set_conversion_mode(opencc->converter, conversion_mode); -} - -opencc_error opencc_errno(void) { - if (!lib_initialized) { - lib_initialize(); - } - return errnum; -} - -void opencc_perror(const char* spec) { - if (!lib_initialized) { - lib_initialize(); - } - perr(spec); - perr("\n"); - switch (errnum) { - case OPENCC_ERROR_VOID: - break; - case OPENCC_ERROR_DICTLOAD: - dictionary_perror(_("Dictionary loading error")); - break; - case OPENCC_ERROR_CONFIG: - config_perror(_("Configuration error")); - break; - case OPENCC_ERROR_CONVERTER: - converter_perror(_("Converter error")); - break; - case OPENCC_ERROR_ENCODING: - perr(_("Encoding error")); - break; - default: - perr(_("Unknown")); - } - perr("\n"); -} diff --git a/src/opencc.h b/src/opencc.h index db81288..1b2e2e9 100644 --- a/src/opencc.h +++ b/src/opencc.h @@ -1,11 +1,7 @@ -/** - * @file - * OpenCC API. - * - * @license +/* * Open Chinese Convert * - * Copyright 2010-2013 BYVoid + * Copyright 2010-2014 BYVoid * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,144 +19,196 @@ #ifndef __OPENCC_H_ #define __OPENCC_H_ -/** - * @defgroup opencc_api OpenCC API - * - * API in C language - */ +#ifdef __cplusplus -#include "opencc_types.h" +#include +#include "Export.hpp" -#ifdef __cplusplus extern "C" { +#else +#include #endif -/** - * Filename of default Simplified to Traditional configuration. - * - * @ingroup opencc_api - */ -#define OPENCC_DEFAULT_CONFIG_SIMP_TO_TRAD "zhs2zht.ini" +#ifndef OPENCC_EXPORT +#define OPENCC_EXPORT +#endif /** - * Filename of default Traditional to Simplified configuration. - * - * @ingroup opencc_api - */ -#define OPENCC_DEFAULT_CONFIG_TRAD_TO_SIMP "zht2zhs.ini" +* @defgroup opencc_c_api OpenCC C API +* +* API in C language +*/ /** - * Makes an instance of opencc. - * Leave config_file to NULL if you do not want to load any configuration file. - * - * @param config_file Location of configuration file. - * @return A description pointer of the newly allocated instance of - * opencc. On error the return value will be (opencc_t) -1. - * @ingroup opencc_api - */ -opencc_t opencc_open(const char* config_file); +* Filename of default Simplified to Traditional configuration +* +* @ingroup opencc_c_api +*/ +#define OPENCC_DEFAULT_CONFIG_SIMP_TO_TRAD "s2t.json" /** - * Destroys an instance of opencc. - * - * @param od The description pointer. - * @return 0 on success or non-zero number on failure. - */ -int opencc_close(opencc_t od); +* Filename of default Traditional to Simplified configuration +* +* @ingroup opencc_c_api +*/ +#define OPENCC_DEFAULT_CONFIG_TRAD_TO_SIMP "t2s.json" /** - * Converts a UCS-4 string from *inbuf to *outbuf. - * Do not forget to assign **outbuf to L'\0' after called if you want to use it - * as a C-Style string. - * - * @param od The opencc description pointer. - * @param inbuf The pointer to the UCS-4 string. - * @param inbufleft The maximum number of characters in *inbuf to be converted. - * @param outbuf The pointer to the output buffer. - * @param outbufleft The size of output buffer. - * - * @return The number of characters in the input buffer that has been - * converted. - * @ingroup opencc_api - */ -size_t opencc_convert(opencc_t od, - ucs4_t** inbuf, - size_t* inbufleft, - ucs4_t** outbuf, - size_t* outbufleft); +* Type of opencc descriptor +* +* @ingroup opencc_c_api +*/ +typedef void* opencc_t; /** - * Converts UTF-8 string from inbuf. - * This function returns an allocated C-Style string via malloc(), which stores - * the converted string. - * You should call opencc_convert_utf8_free() to release allocated memory. - * - * @param od The opencc description pointer. - * @param inbuf The UTF-8 encoded string. - * @param length The maximum length of inbuf to convert. If length is set to -1, - * the whole c-style string in inbuf will be converted. - * - * @return The newly allocated UTF-8 string that stores text converted - * from inbuf. - * @ingroup opencc_api - */ -char* opencc_convert_utf8(opencc_t od, const char* inbuf, size_t length); +* Makes an instance of opencc +* +* @param configFileName Location of configuration file. If this is set to NULL, +* OPENCC_DEFAULT_CONFIG_SIMP_TO_TRAD will be loaded. +* @return A description pointer of the newly allocated instance of +* opencc. On error the return value will be (opencc_t) -1. +* @ingroup opencc_c_api +*/ +OPENCC_EXPORT opencc_t opencc_open(const char* configFileName); /** - * Releases allocated buffer by opencc_convert_utf8. - * - * @param buf Pointer to the allocated string buffer by opencc_convert_utf8. - * - * @ingroup opencc_api - */ -void opencc_convert_utf8_free(char* buf); +* Destroys an instance of opencc +* +* @param opencc The description pointer. +* @return 0 on success or non-zero number on failure. +* @ingroup opencc_c_api +*/ +OPENCC_EXPORT int opencc_close(opencc_t opencc); /** - * Loads a dictionary to default dictionary chain. - * - * @param od The opencc description pointer. - * @param dict_filename The name (or location) of the dictionary file. - * @param dict_type The type of the dictionary. - * - * @return 0 on success or non-zero number on failure. - * - * @ingroup opencc_api - * @deprecated This function is not recommended to use and will be removed. - */ -int opencc_dict_load(opencc_t od, - const char* dict_filename, - opencc_dictionary_type dict_type); +* Converts UTF-8 string +* +* @param opencc The opencc description pointer. +* @param input The UTF-8 encoded string. +* @param length The maximum length in byte to convert. If length is (size_t)-1, +* the whole string (terminated by '\0') will be converted. +* @param output The buffer to store converted text. You MUST make sure this +* buffer has sufficient space. +* +* @return The length of converted string or (size_t)-1 on error. +* +* @ingroup opencc_c_api +*/ +OPENCC_EXPORT size_t opencc_convert_utf8_to_buffer(opencc_t opencc, + const char* input, + size_t length, + char* output); /** - * Changes the mode of conversion. - * - * @param od The opencc description pointer. - * @param conversion_mode Conversion mode. Options are - * - OPENCC_CONVERSION_FAST - * - OPENCC_CONVERSION_SEGMENT_ONLY - * - OPENCC_CONVERSION_LIST_CANDIDATES - * @ingroup opencc_api - */ -void opencc_set_conversion_mode(opencc_t od, - opencc_conversion_mode conversion_mode); +* Converts UTF-8 string +* This function returns an allocated C-Style string, which stores +* the converted string. +* You MUST call opencc_convert_utf8_free() to release allocated memory. +* +* @param opencc The opencc description pointer. +* @param input The UTF-8 encoded string. +* @param length The maximum length in byte to convert. If length is (size_t)-1, +* the whole string (terminated by '\0') will be converted. +* +* @return The newly allocated UTF-8 string that stores text converted, +* or NULL on error. +* @ingroup opencc_c_api +*/ +OPENCC_EXPORT char* opencc_convert_utf8(opencc_t opencc, + const char* input, + size_t length); /** - * Returns an opencc_convert_errno_t which describes the last error. - * - * @return The error type. - */ -opencc_error opencc_errno(void); +* Releases allocated buffer by opencc_convert_utf8 +* +* @param str Pointer to the allocated string buffer by opencc_convert_utf8. +* +* @ingroup opencc_c_api +*/ +OPENCC_EXPORT void opencc_convert_utf8_free(char* str); /** - * Prints the error message to stderr. - * - * @param spec Prefix message. - * @ingroup opencc_api - */ -void opencc_perror(const char* spec); +* Returns the last error message +* +* Note that this function is the only one which is NOT thread-safe. +* +* @ingroup opencc_c_api +*/ +OPENCC_EXPORT const char* opencc_error(void); #ifdef __cplusplus } + +/** +* @defgroup opencc_simple_api OpenCC C++ Simple API +* +* Simple API in C++ language +*/ + +namespace opencc { +/** +* A high level converter +* This interface does not require C++11 to compile. +* @ingroup opencc_simple_api +*/ +class OPENCC_EXPORT SimpleConverter { +public: + /** + * Constructor of SimpleConverter + * @param configFileName File name of configuration. + */ + SimpleConverter(const std::string& configFileName); + + ~SimpleConverter(); + + /** + * Converts a text + * @param input Text to be converted. + */ + std::string Convert(const std::string& input) const; + + /** + * Converts a text + * @param input A C-Style string (terminated by '\0') to be converted. + */ + std::string Convert(const char* input) const; + + /** + * Converts a text + * @param input A C-Style string limited by a given length to be converted. + * @param length Maximal length in byte of the input string. + */ + std::string Convert(const char* input, size_t length) const; + + /** + * Converts a text and writes to an allocated buffer + * Please make sure the buffer has sufficent space. + * @param input A C-Style string (terminated by '\0') to be converted. + * @param output Buffer to write the converted text. + * @return Length of converted text. + */ + size_t Convert(const char* input, char* output) const; + + /** + * Converts a text and writes to an allocated buffer + * Please make sure the buffer has sufficent space. + * @param input A C-Style string limited by a given length to be converted. + * @param length Maximal length in byte of the input string. + * @param output Buffer to write the converted text. + * @return Length of converted text. + */ + size_t Convert(const char* input, size_t length, char* output) const; + +private: + const void* internalData; +}; +} #endif -#endif /* __OPENCC_H_ */ +/** +* @defgroup opencc_cpp_api OpenCC C++ Comprehensive API +* +* Comprehensive API in C++ language +*/ + +#endif diff --git a/src/opencc_types.h b/src/opencc_types.h deleted file mode 100644 index a0067de..0000000 --- a/src/opencc_types.h +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Open Chinese Convert - * - * Copyright 2010-2013 BYVoid - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __OPENCC_TYPES_H_ -#define __OPENCC_TYPES_H_ - -#ifdef __cplusplus -extern "C" { -#endif // ifdef __cplusplus - -#include -#include - -typedef void* opencc_t; - -typedef uint32_t ucs4_t; - -enum _opencc_error { - OPENCC_ERROR_VOID, - OPENCC_ERROR_DICTLOAD, - OPENCC_ERROR_CONFIG, - OPENCC_ERROR_ENCODING, - OPENCC_ERROR_ENCODIND = OPENCC_ERROR_ENCODING, - OPENCC_ERROR_CONVERTER -}; -typedef enum _opencc_error opencc_error; - -enum _opencc_dictionary_type { - OPENCC_DICTIONARY_TYPE_TEXT, - OPENCC_DICTIONARY_TYPE_DATRIE -}; -typedef enum _opencc_dictionary_type opencc_dictionary_type; - -enum _opencc_conversion_mode { - OPENCC_CONVERSION_FAST = 0, - OPENCC_CONVERSION_SEGMENT_ONLY = 1, - OPENCC_CONVERSION_LIST_CANDIDATES = 2 -}; -typedef enum _opencc_conversion_mode opencc_conversion_mode; - -#ifdef __cplusplus -} -#endif // ifdef __cplusplus - -#endif /* __OPENCC_TYPES_H_ */ diff --git a/src/symbols.cmake b/src/symbols.cmake deleted file mode 100644 index 4ff8677..0000000 --- a/src/symbols.cmake +++ /dev/null @@ -1,41 +0,0 @@ -set( - OPENCC_SYMBOLS - opencc_open - opencc_close - opencc_convert - opencc_convert_utf8 - opencc_convert_utf8_free - opencc_dict_load - opencc_set_conversion_mode - opencc_errno - opencc_perror -) - -set (LINK_FLAGS "") - -if (APPLE) - - # Create a symbols_list file for the darwin linker - string(REPLACE ";" "\n_" _symbols "${OPENCC_SYMBOLS}") - set(_symbols_list "${CMAKE_CURRENT_BINARY_DIR}/symbols.list") - file(WRITE ${_symbols_list} "_${_symbols}\n") - set(LINK_FLAGS - "${LINK_FLAGS} -Wl,-exported_symbols_list,'${_symbols_list}'") - -elseif (CMAKE_C_COMPILER_ID STREQUAL GNU) - # Create a version script for GNU ld. - set(_symbols "{ global: ${OPENCC_SYMBOLS}; local: *; };") - set(_version_script "${CMAKE_CURRENT_BINARY_DIR}/version.script") - file(WRITE ${_version_script} "${_symbols}\n") - - set(LINK_FLAGS "${LINK_FLAGS} -Wl,--version-script,'${_version_script}'") - -endif (APPLE) - -set_target_properties( - ${LIBOPENCC_TARGET} - ${LIBOPENCC_STATIC_TARGET} - PROPERTIES - LINK_FLAGS - "${LINK_FLAGS}" -) diff --git a/src/tools/CMakeLists.txt b/src/tools/CMakeLists.txt deleted file mode 100644 index 8050938..0000000 --- a/src/tools/CMakeLists.txt +++ /dev/null @@ -1,74 +0,0 @@ -set( - LIBOPENCC_DICTIONARY_SOURCES - ../dict.c - ../dictionary/datrie.c - ../dictionary/text.c - ../dict.h - ../dictionary/datrie.h - ../dictionary/text.h -) - -set( - OPENCC_DCIT_SOURCES - ${LIBOPENCC_DICTIONARY_SOURCES} - opencc_dict.c - ../dict_group.c - ../dict_group.h - ../dict_chain.c - ../dict_chain.h - ../config_reader.c - ../config_reader.h - ../encoding.c - ../encoding.h - ../utils.c - ../utils.h -) - -add_executable( - opencc_dict - ${OPENCC_DCIT_SOURCES} -) - -target_link_libraries( - opencc_dict - ${LIBOPENCC_TARGET} -) - -install( - TARGETS - opencc_dict - RUNTIME - DESTINATION - ${DIR_BIN} -) - - -set( - OPENCC_SOURCES - opencc.c - ../utils.c - ../utils.h -) - -add_executable( - opencc - ${OPENCC_SOURCES} -) - -add_dependencies( - opencc - ocds -) - -target_link_libraries( - opencc - ${LIBOPENCC_TARGET} -) - -install( - TARGETS - opencc - RUNTIME - DESTINATION - ${DIR_BIN} -) diff --git a/src/tools/opencc.c b/src/tools/opencc.c deleted file mode 100644 index b8e36ed..0000000 --- a/src/tools/opencc.c +++ /dev/null @@ -1,196 +0,0 @@ -/* - * Open Chinese Convert - * - * Copyright 2010-2013 BYVoid - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../opencc.h" -#include "../utils.h" -#include -#include -#include -#include -#include - -#ifndef VERSION -#define VERSION "" -#endif - -#define BUFFER_SIZE 65536 - -void convert(const char* input_file, - const char* output_file, - const char* config_file) { - opencc_t od = opencc_open(config_file); - if (od == (opencc_t)-1) { - opencc_perror(_("OpenCC initialization error")); - exit(1); - } - FILE* fp = stdin; - FILE* fpo = stdout; - if (input_file) { - fp = fopen(input_file, "r"); - if (!fp) { - fprintf(stderr, _("Can not read file: %s\n"), input_file); - exit(1); - } - skip_utf8_bom(fp); - } - if (output_file) { - fpo = fopen(output_file, "w"); - if (!fpo) { - fprintf(stderr, _("Can not write file: %s\n"), output_file); - exit(1); - } - } - size_t size = BUFFER_SIZE; - char* buffer_in = NULL, * buffer_out = NULL; - buffer_in = (char*)malloc(size * sizeof(char)); - char* lookahead = (char*)malloc(size * sizeof(char)); - size_t lookahead_size = 0; - while (!feof(fp)) { - size_t read; - if (lookahead_size > 0) { - memcpy(buffer_in, lookahead, lookahead_size); - read = - fread(buffer_in + lookahead_size, 1, size - lookahead_size, - fp) + lookahead_size; - lookahead_size = 0; - } else { - read = fread(buffer_in, 1, size, fp); - } - // If we haven't finished reading after filling the entire buffer, - // then it could be that we broke within an UTF-8 character, in - // that case we must backtrack and find the boundary - if (read == size) { - // Find the boundary of last UTF-8 character - int i; - for (i = read - 1; i >= 0; i--) { - char c = buffer_in[i]; - if (!(c & 0x80) || ((c & 0xC0) == 0xC0)) { - break; - } - } - assert(i >= 0); - memcpy(lookahead, buffer_in + i, read - i); - lookahead_size = read - i; - buffer_in[i] = '\0'; - } else { - buffer_in[read] = '\0'; - } - buffer_out = opencc_convert_utf8(od, buffer_in, (size_t)-1); - if (buffer_out != (char*)-1) { - fprintf(fpo, "%s", buffer_out); - opencc_convert_utf8_free(buffer_out); - } else { - opencc_perror(_("OpenCC error")); - break; - } - } - - if (lookahead_size > 0) { - assert(lookahead_size < size); - lookahead[lookahead_size] = '\0'; - buffer_out = opencc_convert_utf8(od, lookahead, (size_t)-1); - if (buffer_out != (char*)-1) { - fprintf(fpo, "%s", buffer_out); - opencc_convert_utf8_free(buffer_out); - } else { - opencc_perror(_("OpenCC error")); - } - } - opencc_close(od); - free(lookahead); - free(buffer_in); - fclose(fp); - fclose(fpo); -} - -void show_version() { - printf(_("\n")); - printf(_("Open Chinese Convert (OpenCC) Command Line Tool\n")); - printf(_("Version %s\n"), VERSION); - printf(_("\n")); - printf(_("Author: %s\n"), "BYVoid "); - printf(_("Bug Report: %s\n"), "http://github.com/BYVoid/OpenCC/issues"); - printf(_("\n")); -} - -void show_usage() { - show_version(); - printf(_("Usage:\n")); - printf(_(" opencc [Options]\n")); - printf(_("\n")); - printf(_("Options:\n")); - printf(_(" -i [file], --input=[file] Read original text from [file].\n")); - printf(_(" -o [file], --output=[file] Write converted text to [file].\n")); - printf(_( - " -c [file], --config=[file] Load configuration of conversion from [file].\n")); - printf(_(" -v, --version Print version and build information.\n")); - printf(_(" -h, --help Print this help.\n")); - printf(_("\n")); - printf(_( - "With no input file, reads standard input and writes converted stream to standard output.\n")); - printf(_( - "Default configuration(%s) will be loaded if not set.\n"), - OPENCC_DEFAULT_CONFIG_SIMP_TO_TRAD); - printf(_("\n")); -} - -int main(int argc, char** argv) { -#ifdef ENABLE_GETTEXT - setlocale(LC_ALL, ""); - bindtextdomain(PACKAGE_NAME, LOCALEDIR); -#endif /* ifdef ENABLE_GETTEXT */ - static struct option longopts[] = - { - { "version", no_argument, NULL, 'v' }, - { "help", no_argument, NULL, 'h' }, - { "input", required_argument, NULL, 'i' }, - { "output", required_argument, NULL, 'o' }, - { "config", required_argument, NULL, 'c' }, - { 0, 0, 0, 0 }, - }; - static int oc; - static char* input_file, * output_file, * config_file; - while ((oc = getopt_long(argc, argv, "vh?i:o:c:", longopts, NULL)) != -1) { - switch (oc) { - case 'v': - show_version(); - return 0; - case 'h': - case '?': - show_usage(); - return 0; - case 'i': - input_file = mstrcpy(optarg); - break; - case 'o': - output_file = mstrcpy(optarg); - break; - case 'c': - config_file = mstrcpy(optarg); - break; - } - } - if (config_file == NULL) { - config_file = mstrcpy(OPENCC_DEFAULT_CONFIG_SIMP_TO_TRAD); - } - convert(input_file, output_file, config_file); - free(input_file); - free(output_file); - free(config_file); - return 0; -} diff --git a/src/tools/opencc_dict.c b/src/tools/opencc_dict.c deleted file mode 100644 index c315996..0000000 --- a/src/tools/opencc_dict.c +++ /dev/null @@ -1,408 +0,0 @@ -/* - * Open Chinese Convert - * - * Copyright 2010-2013 BYVoid - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../dictionary/datrie.h" -#include "../dictionary/text.h" -#include "../dict_group.h" -#include "../encoding.h" -#include "../utils.h" -#include -#include - -#ifndef VERSION -#define VERSION "" -#endif - -#define DATRIE_SIZE 1000000 -#define DATRIE_WORD_MAX_COUNT 500000 -#define DATRIE_WORD_MAX_LENGTH 32 -#define BUFFER_SIZE 1024 - -typedef struct { - uint32_t cursor; - ucs4_t* pointer; -} Value; - -typedef struct { - ucs4_t* key; - Value* value; - size_t length; - size_t value_count; -} Entry; - -Entry lexicon[DATRIE_WORD_MAX_COUNT]; -uint32_t lexicon_count, words_set_count; -int words_set[DATRIE_WORD_MAX_COUNT]; -ucs4_t words_set_char[DATRIE_WORD_MAX_COUNT]; -DatrieItem dat[DATRIE_SIZE]; -uint32_t lexicon_index_length, lexicon_cursor_end; - -void match_word(const DatrieItem* dat, - const ucs4_t* word, - int* match_pos, - int* id, - int limit) { - int i, j, p; - for (i = 0, p = 0; - word[p] && (limit == 0 || p < limit) && dat[i].base != DATRIE_UNUSED; - p++) { - int k = encode_char(word[p]); - j = dat[i].base + k; - if ((j < 0) || (j > DATRIE_SIZE) || (dat[j].parent != i)) { - break; - } - i = j; - } - if (match_pos) { - *match_pos = p; - } - if (id) { - *id = i; - } -} - -int unused(int i) { - if ((i >= 0) && (i < DATRIE_SIZE)) { - return dat[i].parent == DATRIE_UNUSED; - } - return 0; -} - -int is_prefix(const ucs4_t* a, const ucs4_t* b) { - const ucs4_t* p = a, * q = b; - while (*p != 0) { - if (*q == 0) { - return 0; - } - if (*p != *q) { - return 0; - } - p++; - q++; - } - return 1; -} - -int binary_search(const ucs4_t* str) { - int a = 0, b = lexicon_count - 1, c; - while (a + 1 < b) { - c = (a + b) / 2; - - if (ucs4cmp(str, lexicon[c].key) <= 0) { - b = c; - } else { - a = c + 1; - } - } - if (is_prefix(str, - lexicon[a].key) && - ((a == 0) || !is_prefix(str, lexicon[a - 1].key))) { - return a; - } - if (is_prefix(str, lexicon[b].key) && !is_prefix(str, lexicon[b - 1].key)) { - return b; - } - return -1; -} - -int wcmp(const void* a, const void* b) { - return *(const ucs4_t*)a < *(const ucs4_t*)b ? -1 : 1; -} - -void get_words_with_prefix(ucs4_t* word, int p) { - int i; - static ucs4_t buff[DATRIE_WORD_MAX_LENGTH]; - static ucs4_t words_set_char_buff[DATRIE_WORD_MAX_COUNT]; - - for (i = 0; i < p; i++) { - buff[i] = word[i]; - } - buff[p] = 0; - words_set_count = 0; - for (i = binary_search(buff); - (uint32_t)i < lexicon_count && is_prefix(buff, lexicon[i].key); i++) { - if (ucs4cmp(buff, lexicon[i].key) == 0) { - continue; - } - words_set_char_buff[words_set_count] = lexicon[i].key[p]; - words_set[words_set_count++] = i; - } - words_set_char_buff[words_set_count] = 0; - qsort(words_set_char_buff, words_set_count, sizeof(words_set_char_buff[0]), - wcmp); - ucs4_t* wfp, * wp, last; - for (last = 0, wfp = words_set_char_buff, wp = words_set_char; *wfp; wfp++) { - if (*wfp != last) { - last = *wfp; - *wp = *wfp; - wp++; - } - } - *wp = 0; -} - -int words_space_available(int delta) { - ucs4_t* wp; - for (wp = words_set_char; *wp; wp++) { - if (!unused(encode_char(*wp) + delta)) { - return 0; - } - } - return 1; -} - -void insert_first_char(int id) { - Entry* word = lexicon + id; - int key = encode_char(word->key[0]); - dat[key].base = DATRIE_UNUSED; - dat[key].parent = 0; - if (word->length == 1) { - dat[key].word = (id); - } -} - -void insert_words(int delta, int parent, size_t word_len) { - int i; - for (i = 0; (uint32_t)i < words_set_count; i++) { - int j = words_set[i]; - int k = encode_char(lexicon[j].key[word_len]) + delta; - dat[k].parent = parent; - if (lexicon[j].length == word_len + 1) { - dat[k].word = (j); - } - } -} - -void insert(int id) { - static int space_min = 0; - Entry* word = &lexicon[id]; - for (;;) { - int p, i; - match_word(dat, word->key, &p, &i, 0); - if ((size_t)p == word->length) { - return; - } - get_words_with_prefix(word->key, p); - int delta; - delta = space_min - words_set_char[0]; - for (; delta < DATRIE_SIZE; delta++) { - if (words_space_available(delta)) { - break; - } - } - if (delta == DATRIE_SIZE) { - fprintf(stderr, "DATRIE_SIZE Not Enough!\n"); - exit(1); - } - insert_words(delta, i, p); - dat[i].base = delta; - while (!unused(space_min)) { - space_min++; - } - } -} - -void make(void) { - size_t i; - for (i = 1; i < DATRIE_SIZE; i++) { - dat[i].parent = dat[i].base = DATRIE_UNUSED; - dat[i].word = -1; - } - dat[0].parent = dat[0].base = 0; - for (i = 0; i < lexicon_count; i++) { - insert_first_char(i); - } - for (i = 0; i < lexicon_count; i++) { - insert(i); - } -} - -int cmp(const void* a, const void* b) { - return ucs4cmp(((const TextEntry*)a)->key, ((const TextEntry*)b)->key); -} - -void init(const char* filename) { - DictGroup* DictGroup = dict_group_new(NULL); - if (dict_group_load(DictGroup, filename, - OPENCC_DICTIONARY_TYPE_TEXT) == -1) { - dictionary_perror("Dictionary loading error"); - fprintf(stderr, _("\n")); - exit(1); - } - Dict* dict_abs = dict_group_get_dict(DictGroup, 0); - if (dict_abs == (Dict*)-1) { - dictionary_perror("Dictionary loading error"); - fprintf(stderr, _("\n")); - exit(1); - } - static TextEntry tlexicon[DATRIE_WORD_MAX_COUNT]; - /* TODO add datrie support */ - Dict* dictionary = dict_abs->dict; - lexicon_count = dict_text_get_lexicon(dictionary, tlexicon); - qsort(tlexicon, lexicon_count, sizeof(tlexicon[0]), cmp); - size_t i; - size_t lexicon_cursor = 0; - for (i = 0; i < lexicon_count; i++) { - lexicon[i].key = tlexicon[i].key; - lexicon[i].length = ucs4len(lexicon[i].key); - size_t j; - for (j = 0; tlexicon[i].value[j] != NULL; j++) {} - lexicon[i].value_count = j; - lexicon_index_length += lexicon[i].value_count + 1; - lexicon[i].value = (Value*)malloc(lexicon[i].value_count * sizeof(Value)); - for (j = 0; j < lexicon[i].value_count; j++) { - lexicon[i].value[j].cursor = lexicon_cursor; - lexicon[i].value[j].pointer = tlexicon[i].value[j]; - lexicon_cursor += ucs4len(tlexicon[i].value[j]) + 1; - } - } - lexicon_cursor_end = lexicon_cursor; -} - -void output(const char* file_name) { - FILE* fp = fopen(file_name, "wb"); - if (!fp) { - fprintf(stderr, _("Can not write file: %s\n"), file_name); - exit(1); - } - uint32_t i, item_count; - for (i = DATRIE_SIZE - 1; i > 0; i--) { - if (dat[i].parent != DATRIE_UNUSED) { - break; - } - } - item_count = i + 1; - fwrite("OPENCCDATRIE", sizeof(char), strlen("OPENCCDATRIE"), fp); - /* 詞彙表長度 */ - fwrite(&lexicon_cursor_end, sizeof(uint32_t), 1, fp); - for (i = 0; i < lexicon_count; i++) { - size_t j; - for (j = 0; j < lexicon[i].value_count; j++) { - fwrite(lexicon[i].value[j].pointer, sizeof(ucs4_t), - ucs4len(lexicon[i].value[j].pointer) + 1, fp); - } - } - /* 詞彙索引表長度 */ - fwrite(&lexicon_index_length, sizeof(uint32_t), 1, fp); - for (i = 0; i < lexicon_count; i++) { - size_t j; - for (j = 0; j < lexicon[i].value_count; j++) { - fwrite(&lexicon[i].value[j].cursor, sizeof(uint32_t), 1, fp); - } - uint32_t dem = (uint32_t)-1; - fwrite(&dem, sizeof(uint32_t), 1, fp); /* 分隔符 */ - } - fwrite(&lexicon_count, sizeof(uint32_t), 1, fp); - fwrite(&item_count, sizeof(uint32_t), 1, fp); - fwrite(dat, sizeof(dat[0]), item_count, fp); - fclose(fp); -} - -#ifdef DEBUG_WRITE_TEXT -void write_text_file() { - FILE* fp; - int i; - fp = fopen("datrie.txt", "w"); - fprintf(fp, "%d\n", lexicon_count); - for (i = 0; i < lexicon_count; i++) { - char* buff = ucs4_to_utf8(lexicon[i].value, (size_t)-1); - fprintf(fp, "%s\n", buff); - free(buff); - } - for (i = 0; i < DATRIE_SIZE; i++) { - if (dat[i].parent != DATRIE_UNUSED) { - fprintf(fp, "%d %d %d %d\n", i, dat[i].base, dat[i].parent, dat[i].word); - } - } - fclose(fp); -} - -#endif /* ifdef DEBUG_WRITE_TEXT */ - -void show_version() { - printf(_("\nOpen Chinese Convert (OpenCC) Dictionary Tool\nVersion %s\n\n"), - VERSION); -} - -void show_usage() { - show_version(); - printf(_("Usage:\n")); - printf(_(" opencc_dict -i input_file -o output_file\n\n")); - printf(_(" -i input_file\n")); - printf(_(" Read data from input_file.\n")); - printf(_(" -o output_file\n")); - printf(_(" Write converted data to output_file.\n")); - printf(_("\n")); - printf(_("\n")); -} - -int main(int argc, char** argv) { - static int oc; - static char input_file[BUFFER_SIZE], output_file[BUFFER_SIZE]; - int input_file_specified = 0, output_file_specified = 0; - -#ifdef ENABLE_GETTEXT - setlocale(LC_ALL, ""); - bindtextdomain(PACKAGE_NAME, LOCALEDIR); -#endif /* ifdef ENABLE_GETTEXT */ - while ((oc = getopt(argc, argv, "vh-:i:o:")) != -1) { - switch (oc) { - case 'v': - show_version(); - return 0; - case 'h': - case '?': - show_usage(); - return 0; - case '-': - if (strcmp(optarg, "version") == 0) { - show_version(); - } else if (strcmp(optarg, "help") == 0) { - show_usage(); - } else { - show_usage(); - } - return 0; - case 'i': - strcpy(input_file, optarg); - input_file_specified = 1; - break; - case 'o': - strcpy(output_file, optarg); - output_file_specified = 1; - break; - } - } - if (!input_file_specified) { - fprintf(stderr, _("Please specify input file using -i.\n")); - show_usage(); - return 1; - } - if (!output_file_specified) { - fprintf(stderr, _("Please specify output file using -o.\n")); - show_usage(); - return 1; - } - init(input_file); - make(); - output(output_file); -#ifdef DEBUG_WRITE_TEXT - write_text_file(); -#endif /* ifdef DEBUG_WRITE_TEXT */ - return 0; -} diff --git a/src/utils.c b/src/utils.c deleted file mode 100644 index 3519808..0000000 --- a/src/utils.c +++ /dev/null @@ -1,182 +0,0 @@ -/* - * Open Chinese Convert - * - * Copyright 2010-2013 BYVoid - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "utils.h" -#include - -#ifdef __APPLE__ - #include "TargetConditionals.h" - #ifdef TARGET_OS_MAC - #include - #elif TARGET_OS_IPHONE - #elif TARGET_IPHONE_SIMULATOR - #else /* ifdef TARGET_OS_MAC */ - #endif /* ifdef TARGET_OS_MAC */ -#elif defined _WIN32 || defined _WIN64 - #include "Windows.h" -#endif /* ifdef __APPLE__ */ - -#if defined _WIN32 || defined _WIN64 - #define PATH_SEPARATOR '\\' -#else - #define PATH_SEPARATOR '/' -#endif - -#define PATH_BUFFER_SIZE 4096 - -void perr(const char* str) { - fputs(str, stderr); -} - -int qsort_int_cmp(const void* a, const void* b) { - return *((int*)a) - *((int*)b); -} - -char* mstrcpy(const char* str) { - char* strbuf = (char*)malloc(sizeof(char) * (strlen(str) + 1)); - - strcpy(strbuf, str); - return strbuf; -} - -char* mstrncpy(const char* str, size_t n) { - char* strbuf = (char*)malloc(sizeof(char) * (n + 1)); - - strncpy(strbuf, str, n); - strbuf[n] = '\0'; - return strbuf; -} - -void skip_utf8_bom(FILE* fp) { - int bom[3]; - int n; - - /* UTF-8 BOM is EF BB BF */ - if (fp == NULL) { - return; - } - - /* If we are not at beginning of file, return */ - if (ftell(fp) != 0) { - return; - } - - /* Try to read first 3 bytes */ - for (n = 0; n <= 2 && (bom[n] = getc(fp)) != EOF; n++) {} - - /* If we can only read <3 bytes, push them back */ - /* Or if first 3 bytes is not BOM, push them back */ - if ((n < 3) || (bom[0] != 0xEF) || (bom[1] != 0xBB) || (bom[2] != 0xBF)) { - for (n--; n >= 0; n--) { - ungetc(bom[n], fp); - } - } - - /* Otherwise, BOM is already skipped */ -} - -const char* executable_path(void) { - static char path_buffer[PATH_BUFFER_SIZE]; - static int calculated = 0; - - if (!calculated) { -#ifdef __linux - ssize_t res = readlink("/proc/self/exe", path_buffer, sizeof(path_buffer)); - assert(res != -1); -#elif __APPLE__ - uint32_t size = sizeof(path_buffer); - int res = _NSGetExecutablePath(path_buffer, &size); - assert(res == 0); -#elif _WIN32 || _WIN64 - // NOTE: for "C:\\opencc.exe" on Windows, the returned path "C:" is - // incorrect until a '/' is appended to it later in try_open_file() - DWORD res = GetModuleFileNameA(NULL, path_buffer, PATH_BUFFER_SIZE); - assert(res != 0); -#else - /* Other unsupported os */ - assert(0); -#endif /* ifdef __linux */ - char* last_sep = strrchr(path_buffer, PATH_SEPARATOR); - assert(last_sep != NULL); - *last_sep = '\0'; - calculated = 1; - } - return path_buffer; -} - -char* try_open_file(const char* path) { - /* Try to find file in current working directory */ - FILE* fp = fopen(path, "r"); - - if (fp) { - fclose(fp); - return mstrcpy(path); - } - - /* If path is absolute, return NULL */ - if (is_absolute_path(path)) { - return NULL; - } - - /* Try to find file in executable directory */ - const char* exe_dir = executable_path(); - char* filename = - (char*)malloc(sizeof(char) * (strlen(path) + strlen(exe_dir) + 2)); - sprintf(filename, "%s/%s", exe_dir, path); - fp = fopen(filename, "r"); - - if (fp) { - fclose(fp); - return filename; - } - free(filename); - - /* Try to use PKGDATADIR */ - filename = - (char*)malloc(sizeof(char) * (strlen(path) + strlen(PKGDATADIR) + 2)); - sprintf(filename, "%s/%s", PKGDATADIR, path); - fp = fopen(filename, "r"); - - if (fp) { - fclose(fp); - return filename; - } - free(filename); - return NULL; -} - -char* get_file_path(const char* filename) { - const char* last_sep = strrchr(filename, '/'); - - if (last_sep == NULL) { - last_sep = filename; - } - char* path = mstrncpy(filename, last_sep - filename); - return path; -} - -int is_absolute_path(const char* path) { - if (path[0] == '/') { - return 1; - } - - if (path[1] == ':') { - return 1; - } - return 0; -} diff --git a/src/utils.h b/src/utils.h deleted file mode 100644 index c2c994c..0000000 --- a/src/utils.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Open Chinese Convert - * - * Copyright 2010-2013 BYVoid - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __OPENCC_UTILS_H_ -#define __OPENCC_UTILS_H_ - -#include "common.h" - -#define debug_should_not_be_here() \ - do { \ - fprintf(stderr, "Should not be here %s: %d\n", __FILE__, __LINE__); \ - assert(0); \ - } while (0) \ - -void perr(const char* str); - -int qsort_int_cmp(const void* a, const void* b); - -char* mstrcpy(const char* str); - -char* mstrncpy(const char* str, size_t n); - -void skip_utf8_bom(FILE* fp); - -const char* executable_path(void); - -char* try_open_file(const char* path); - -char* get_file_path(const char* filename); - -int is_absolute_path(const char* path); - -#endif /* __OPENCC_UTILS_H_ */ diff --git a/src/wrapper/cplusplus/openccxx.h b/src/wrapper/cplusplus/openccxx.h deleted file mode 100644 index a937555..0000000 --- a/src/wrapper/cplusplus/openccxx.h +++ /dev/null @@ -1,136 +0,0 @@ -/* - * Open Chinese Convert - * - * Copyright 2010-2013 BYVoid - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __OPENCCXX_H_ -#define __OPENCCXX_H_ - -/** - * c++ wrapper for opencc - */ -#ifdef __cplusplus - -extern "C" { -# include -} - -# include -# include - -namespace opencc { - -class opencc { -public: - opencc(const char* config_file = NULL) - : od((opencc_t)-1) { - open(config_file); - } - - virtual ~opencc() { - if (od != (opencc_t)-1) { - opencc_close(od); - } - } - - operator bool() const { - return od != (opencc_t)-1; - } - - int open(const char* config_file) { - if (od != (opencc_t)-1) { - opencc_close(od); - } - od = opencc_open(config_file); - return (od == (opencc_t)-1) ? (-1) : (0); - } - - int set_conversion_mode(opencc_conversion_mode conversion_mode) { - if (od == (opencc_t)-1) { - return -1; - } - opencc_set_conversion_mode(od, conversion_mode); - return 0; - } - - long convert(const std::string& in, std::string& out, long length = -1) { - if (od == (opencc_t)-1) { - return -1; - } - if (length == -1) { - length = in.length(); - } - char* outbuf = opencc_convert_utf8(od, in.c_str(), length); - if (outbuf == (char*)-1) { - return -1; - } - out = outbuf; - free(outbuf); - return length; - } - - /** - * Warning: - * This method can be used only if wchar_t is encoded in UCS4 on your - *platform. - */ - long convert(const std::wstring& in, std::wstring& out, long length = -1) { - if (od == (opencc_t)-1) { - return -1; - } - size_t inbuf_left = in.length(); - if ((length >= 0) && (length < (long)inbuf_left)) { - inbuf_left = length; - } - const ucs4_t* inbuf = (const ucs4_t*)in.c_str(); - long count = 0; - while (inbuf_left != 0) { - size_t retval; - size_t outbuf_left; - ucs4_t* outbuf; - /* occupy space */ - outbuf_left = inbuf_left + 64; - out.resize(count + outbuf_left); - outbuf = (ucs4_t*)out.c_str() + count; - retval = opencc_convert(od, (ucs4_t**)&inbuf, - &inbuf_left, &outbuf, &outbuf_left); - if (retval == (size_t)-1) { - return -1; - } - count += retval; - } - /* set the zero termination and shrink the size */ - out.resize(count + 1); - out[count] = L'\0'; - return count; - } - - opencc_error errno() const { - return opencc_errno(); - } - - void perror(const char* spec = "OpenCC") const { - opencc_perror(spec); - } - -private: - opencc_t od; -}; -} - -#endif // ifdef __cplusplus - -#endif /* __OPENCCXX_H_ */ diff --git a/src/wrapper/python/opencc.py b/src/wrapper/python/opencc.py deleted file mode 100755 index 45cbe9f..0000000 --- a/src/wrapper/python/opencc.py +++ /dev/null @@ -1,90 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -from ctypes import cast, cdll, c_char_p, c_int, c_size_t, c_void_p -from ctypes.util import find_library -import sys - -class ConvertError(Exception): - pass - -class DictType: - TEXT,DATRIE = 0,1 - -## @defgroup python_api Python API -# API in python language - -## OpenCC Python language binding -# @ingroup python_api -class OpenCC: - - ## Constructor - # @param self The object pointer. - # @param config Filename of config. - # @param verbose Specifies whether error information is printed. - # @ingroup python_api - def __init__(self, config=None, verbose=True): - self.libopencc = cdll.LoadLibrary(find_library('opencc')) - self.libopencc.opencc_open.restype = c_void_p - self.libopencc.opencc_convert_utf8.argtypes = [c_void_p, c_char_p, c_size_t] - # for checking for the returned '-1' pointer in case opencc_convert() fails. - # c_char_p always tries to convert the returned (char *) to a Python string, - self.libopencc.opencc_convert_utf8.restype = c_void_p - self.libopencc.opencc_close.argtypes = [c_void_p] - self.libopencc.opencc_perror.argtypes = [c_char_p] - self.libopencc.opencc_dict_load.argtypes = [c_void_p, c_char_p, c_int] - - self.libc = cdll.LoadLibrary(find_library('c')) - self.libc.free.argtypes = [c_void_p] - - self.config = config - self.verbose = verbose - self.od = None - - ## @deprecated - def __enter__(self): - if self.config is None: - self.od = self.libopencc.opencc_open(0) - else: - self.od = self.libopencc.opencc_open(c_char_p(self.config)) - return self - - ## @deprecated - def __exit__(self, type, value, traceback): - self.libopencc.opencc_close(self.od) - self.od = None - - def __perror(self, message): - if self.verbose: - self.libopencc.opencc_perror(message) - - ## Converts text. - # @param self The object pointer. - # @param text Input text. - # @return Converted text. - # @ingroup python_api - def convert(self, text): - retv_c = self.libopencc.opencc_convert_utf8(self.od, text, len(text)) - if retv_c == -1: - self.__perror('OpenCC error:') - raise ConvertError() - retv_c = cast(retv_c, c_char_p) - str_buffer = retv_c.value - self.libc.free(retv_c); - return str_buffer - - ## @deprecated - def dict_load(self, filename, dicttype): - retv = self.libopencc.opencc_dict_load(self.od, filename, dicttype) - if retv == -1: - self.__perror('OpenCC error:') - return retv - -if __name__ == "__main__": - with sys.stdin as fp: - text = fp.read() - with OpenCC() as converter: - for path in ['simp_to_trad_characters.ocd', - 'simp_to_trad_phrases.ocd']: - converter.dict_load(path, DictType.DATRIE) - print converter.convert(text) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 2a9b953..4a6b40f 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -1,35 +1,58 @@ -set(CONFIGURATIONS - zhs2zht - zht2zhs - mix2zht - mix2zhs - zhs2zhtw_p - zhs2zhtw_vp - zhtw2zhcn_t - zhtw2zhcn_s +include_directories(../deps/libdarts/src) +include_directories(../src) + +add_executable( + UnitTest + UnitTest.cpp +) +target_link_libraries( + UnitTest + libopencc +) +add_test( + UnitTest + UnitTest +) +set(CONFIG_TEST + config_test/config_test.json + config_test/config_test_characters.txt + config_test/config_test_phrases.txt ) -foreach(CONFIG ${CONFIGURATIONS}) +set(CONFIG_TEST_TARGET_DIR ${PROJECT_BINARY_DIR}/test/config_test) +make_directory(${CONFIG_TEST_TARGET_DIR}) +foreach (CONFIG_TEST_FILE ${CONFIG_TEST}) + configure_file(${CONFIG_TEST_FILE} ${CONFIG_TEST_TARGET_DIR} COPYONLY) +endforeach (CONFIG_TEST_FILE) - add_test( - ${CONFIG}_convert - ${CMAKE_COMMAND} -E chdir ${PROJECT_BINARY_DIR}/data - ${PROJECT_BINARY_DIR}/src/tools/opencc - -i ${CMAKE_SOURCE_DIR}/test/testcases/${CONFIG}.in - -o ${PROJECT_BINARY_DIR}/test/${CONFIG}.out - -c ${CMAKE_SOURCE_DIR}/data/config/${CONFIG}.ini - ) - - add_test( - ${CONFIG}_compare - diff - ${PROJECT_BINARY_DIR}/test/${CONFIG}.out - ${CMAKE_SOURCE_DIR}/test/testcases/${CONFIG}.ans - ) - - set_property( - TEST ${CONFIG}_compare - APPEND PROPERTY - DEPENDS ${CONFIG}_convert) +set(CONFIGURATIONS + s2t + t2s + s2tw + s2twp + tw2s + tw2sp + s2hk + hk2s +) -endforeach(CONFIG) \ No newline at end of file +foreach(CONFIG ${CONFIGURATIONS}) + add_test( + ${CONFIG}_convert + ${CMAKE_COMMAND} -E chdir ${PROJECT_BINARY_DIR}/data + ${PROJECT_BINARY_DIR}/src/opencc + -i ${CMAKE_SOURCE_DIR}/test/testcases/${CONFIG}.in + -o ${PROJECT_BINARY_DIR}/test/${CONFIG}.out + -c ${CMAKE_SOURCE_DIR}/data/config/${CONFIG}.json + ) + add_test( + ${CONFIG}_compare + diff + ${PROJECT_BINARY_DIR}/test/${CONFIG}.out + ${CMAKE_SOURCE_DIR}/test/testcases/${CONFIG}.ans + ) + set_property( + TEST ${CONFIG}_compare + APPEND PROPERTY + DEPENDS ${CONFIG}_convert) +endforeach(CONFIG) diff --git a/test/DictTestUtils.hpp b/test/DictTestUtils.hpp new file mode 100644 index 0000000..f4bc1d4 --- /dev/null +++ b/test/DictTestUtils.hpp @@ -0,0 +1,150 @@ +/* + * Open Chinese Convert + * + * Copyright 2010-2014 BYVoid + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "Conversion.hpp" +#include "DartsDict.hpp" +#include "Dict.hpp" +#include "DictGroup.hpp" +#include "Lexicon.hpp" +#include "TestUtils.hpp" +#include "TextDict.hpp" +#include "Segments.hpp" + +#if defined(_MSC_VER) && _MSC_VER > 1310 +// Visual C++ 2005 and later require the source files in UTF-8, and all strings +// to be encoded as wchar_t otherwise the strings will be converted into the +// local multibyte encoding and cause errors. To use a wchar_t as UTF-8, these +// strings then need to be convert back to UTF-8. This function is just a rough +// example of how to do this. +# include +# define utf8(str) ConvertToUTF8(L ## str) +string ConvertToUTF8(const wchar_t* pStr) { + static char szBuf[1024]; + WideCharToMultiByte(CP_UTF8, 0, pStr, -1, szBuf, sizeof(szBuf), NULL, NULL); + return szBuf; +} + +#else // if defined(_MSC_VER) && _MSC_VER > 1310 +// Visual C++ 2003 and gcc will use the string literals as is, so the files +// should be saved as UTF-8. gcc requires the files to not have a UTF-8 BOM. +# define utf8(str) string(str) +#endif // if defined(_MSC_VER) && _MSC_VER > 1310 + +namespace opencc { +class DictTestUtils { +public: + static TextDictPtr CreateTextDictForText() { + LexiconPtr lexicon(new Lexicon); + lexicon->Add(DictEntryFactory::New("BYVoid", "byv")); + lexicon->Add(DictEntryFactory::New("zigzagzig", "zag")); + lexicon->Add(DictEntryFactory::New(utf8("積羽沉舟"), utf8("羣輕折軸"))); + lexicon->Add(DictEntryFactory::New(utf8("清"), "Tsing")); + lexicon->Add(DictEntryFactory::New(utf8("清華"), "Tsinghua")); + lexicon->Add(DictEntryFactory::New(utf8("清華大學"), "TsinghuaUniversity")); + lexicon->Sort(); + return TextDictPtr(new TextDict(lexicon)); + } + + static DictPtr CreateDictForCharacters() { + LexiconPtr lexicon(new Lexicon); + lexicon->Add(DictEntryFactory::New(utf8("后"), + vector{utf8("后"), utf8("後")})); + lexicon->Add(DictEntryFactory::New(utf8("发"), + vector{utf8("發"), utf8("髮")})); + lexicon->Add(DictEntryFactory::New(utf8("干"), + vector{utf8("幹"), utf8("乾"), + utf8("干")})); + lexicon->Add(DictEntryFactory::New(utf8("里"), + vector{utf8("裏"), utf8("里")})); + lexicon->Sort(); + return TextDictPtr(new TextDict(lexicon)); + } + + static DictPtr CreateDictForPhrases() { + LexiconPtr lexicon(new Lexicon); + lexicon->Add(DictEntryFactory::New(utf8("太后"), utf8("太后"))); + lexicon->Add(DictEntryFactory::New(utf8("头发"), utf8("頭髮"))); + lexicon->Add(DictEntryFactory::New(utf8("干燥"), utf8("乾燥"))); + lexicon->Add(DictEntryFactory::New(utf8("鼠标"), utf8("鼠標"))); + lexicon->Sort(); + TextDictPtr textDict(new TextDict(lexicon)); + + DartsDictPtr dartsDict = DartsDict::NewFromDict(*textDict.get()); + return dartsDict; + } + + static DictGroupPtr CreateDictGroupForConversion() { + DictPtr phrasesDict = CreateDictForPhrases(); + DictPtr charactersDict = CreateDictForCharacters(); + DictGroupPtr dictGroup( + new DictGroup(list{phrasesDict, charactersDict})); + return dictGroup; + } + + static DictPtr CreateDictForTaiwanVariants() { + LexiconPtr lexicon(new Lexicon); + lexicon->Add(DictEntryFactory::New(utf8("裏"), utf8("裡"))); + TextDictPtr textDict(new TextDict(lexicon)); + return textDict; + } + + static DictPtr CreateTaiwanPhraseDict() { + LexiconPtr lexicon(new Lexicon); + lexicon->Add(DictEntryFactory::New(utf8("鼠标"), utf8("滑鼠"))); + lexicon->Add(DictEntryFactory::New(utf8("服务器"), utf8("伺服器"))); + lexicon->Add(DictEntryFactory::New(utf8("克罗地亚"), utf8("克羅埃西亞"))); + lexicon->Sort(); + TextDictPtr textDict(new TextDict(lexicon)); + + DartsDictPtr dartsDict = DartsDict::NewFromDict(*textDict.get()); + return dartsDict; + } + + static void TestDict(DictPtr dict) { + Optional entry = dict->MatchPrefix("BYVoid"); + AssertTrue(!entry.IsNull()); + AssertEquals(utf8("BYVoid"), entry.Get()->Key()); + AssertEquals(utf8("byv"), entry.Get()->GetDefault()); + + entry = dict->MatchPrefix("BYVoid123"); + AssertTrue(!entry.IsNull()); + AssertEquals(utf8("BYVoid"), entry.Get()->Key()); + AssertEquals(utf8("byv"), entry.Get()->GetDefault()); + + entry = dict->MatchPrefix(utf8("積羽沉舟")); + AssertTrue(!entry.IsNull()); + AssertEquals(utf8("積羽沉舟"), entry.Get()->Key()); + AssertEquals(utf8("羣輕折軸"), entry.Get()->GetDefault()); + + entry = dict->MatchPrefix("Unknown"); + AssertTrue(entry.IsNull()); + + const vector matches = + dict->MatchAllPrefixes(utf8("清華大學計算機系")); + AssertEquals(3, matches.size()); + AssertEquals(utf8("清華大學"), matches.at(0)->Key()); + AssertEquals(utf8("TsinghuaUniversity"), matches.at(0)->GetDefault()); + AssertEquals(utf8("清華"), matches.at(1)->Key()); + AssertEquals(utf8("Tsinghua"), matches.at(1)->GetDefault()); + AssertEquals(utf8("清"), matches.at(2)->Key()); + AssertEquals(utf8("Tsing"), matches.at(2)->GetDefault()); + } +}; +} diff --git a/test/TestUtils.hpp b/test/TestUtils.hpp new file mode 100644 index 0000000..9c622d4 --- /dev/null +++ b/test/TestUtils.hpp @@ -0,0 +1,80 @@ +/* + * Open Chinese Convert + * + * Copyright 2010-2014 BYVoid + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "Common.hpp" +#include "Segments.hpp" + +namespace opencc { + +#ifdef _MSC_VER +# define __func__ __FUNCTION__ +#endif // ifdef _MSC_VER + +#define stringize(s) # s +#define Assert(condition, msg) { \ + if (!(condition)) { \ + std::ostringstream __buffer; \ + __buffer << "Assertion failed: " << stringize(condition) << ", function " \ + << __func__ << ", " << __FILE__ << ":" << __LINE__ << "\n" << msg; \ + throw AssertionFailure(__buffer.str()); \ + } \ +} +#define AssertTrue(condition) Assert(condition, "") +#define AssertEquals(expected, actual) { \ + if (!((expected) == (actual))) { \ + std::ostringstream __buffer0; \ + __buffer0 << "Expected: " << (expected) << "\n"; \ + __buffer0 << "Actual: " << (actual) << "\n"; \ + Assert((expected) == (actual), __buffer0.str()); \ + } \ +} + +class AssertionFailure : public std::runtime_error { +public: + AssertionFailure(string msg) : std::runtime_error(msg) { + } +}; + +static inline void SegmentsAssertEquals(const SegmentsPtr& expected, + const SegmentsPtr& actual) { + size_t length = expected->Length(); + AssertTrue(length == actual->Length()); + for (size_t i = 0; i < length; i++) { + AssertEquals(string(expected->At(i)), string(actual->At(i))); + } +} + +class TestUtils { +public: + static void RunTest(const string name, void (* func)(void)) { + clock_t start = clock(); + std::cout << "[" << name << "]" << "..."; + try { + func(); + clock_t end = clock(); + double duration = (end - start) * 1000.0 / CLOCKS_PER_SEC; + std::cout << "Success" << " (" << duration << "ms)" << std::endl; + } catch (AssertionFailure e) { + std::cout << "Failed" << std::endl; + std::cout << e.what() << std::endl; + } + } +}; +} diff --git a/test/UnitTest.cpp b/test/UnitTest.cpp new file mode 100644 index 0000000..d03ff8f --- /dev/null +++ b/test/UnitTest.cpp @@ -0,0 +1,233 @@ +/* + * Open Chinese Convert + * + * Copyright 2010-2014 BYVoid + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include "BinaryDict.hpp" +#include "Config.hpp" +#include "ConversionChain.hpp" +#include "Converter.hpp" +#include "DictTestUtils.hpp" +#include "MaxMatchSegmentation.hpp" +#include "opencc.h" + +using namespace opencc; + +void TestTextDict() { + TextDictPtr textDict = DictTestUtils::CreateTextDictForText(); + DictTestUtils::TestDict(textDict); + + // Serialization + string fileName = "dict.txt"; + textDict->opencc::SerializableDict::SerializeToFile(fileName); + + // Deserialization + TextDictPtr deserialized = SerializableDict::NewFromFile(fileName); + DictTestUtils::TestDict(deserialized); +} + +void TestBinaryDict() { + TextDictPtr textDict = DictTestUtils::CreateTextDictForText(); + BinaryDictPtr binDict(new BinaryDict(textDict->GetLexicon())); + + // Serialization + string fileName = "dict.bin"; + binDict->opencc::SerializableDict::SerializeToFile(fileName); + + // Deserialization + BinaryDictPtr deserialized = SerializableDict::NewFromFile(fileName); + const LexiconPtr& lex1 = binDict->GetLexicon(); + const LexiconPtr& lex2 = deserialized->GetLexicon(); + + AssertEquals(lex1->Length(), lex2->Length()); + for (size_t i = 0; i < lex1->Length(); i++) { + AssertEquals(string(lex1->At(i)->Key()), lex2->At(i)->Key()); + AssertEquals(lex1->At(i)->NumValues(), lex2->At(i)->NumValues()); + } + + TextDictPtr deserializedTextDict(new TextDict(lex2)); + DictTestUtils::TestDict(deserializedTextDict); +} + +void TestDartsDict() { + TextDictPtr textDict = DictTestUtils::CreateTextDictForText(); + DartsDictPtr dartsDict = DartsDict::NewFromDict(*textDict.get()); + DictTestUtils::TestDict(dartsDict); + + // Serialization + string fileName = "dict.ocd"; + dartsDict->opencc::SerializableDict::SerializeToFile(fileName); + + // Deserialization + DartsDictPtr deserialized = SerializableDict::NewFromFile(fileName); + DictTestUtils::TestDict(deserialized); +} + +void TestDictGroup() { + { + const auto& dictGroup = DictTestUtils::CreateDictGroupForConversion(); + const auto& entry = dictGroup->Dict::MatchPrefix(utf8("Unknown")); + AssertTrue(entry.IsNull()); + + const auto& matches = dictGroup->Dict::MatchAllPrefixes(utf8("干燥")); + AssertEquals(2, matches.size()); + AssertEquals(utf8("乾燥"), matches.at(0)->GetDefault()); + AssertEquals(utf8("幹"), matches.at(1)->GetDefault()); + } + { + DictGroupPtr dictGroup(new DictGroup(list{ + DictTestUtils::CreateDictForPhrases(), + DictTestUtils::CreateTaiwanPhraseDict() + })); + { + const auto& entry = dictGroup->Dict::MatchPrefix(utf8("鼠标")); + AssertEquals(utf8("鼠標"), entry.Get()->GetDefault()); + } + { + const auto& entry = dictGroup->Dict::MatchPrefix(utf8("克罗地亚")); + AssertEquals(utf8("克羅埃西亞"), entry.Get()->GetDefault()); + } + { + const auto& matches = dictGroup->Dict::MatchAllPrefixes(utf8("鼠标")); + AssertEquals(1, matches.size()); + AssertEquals(utf8("鼠標"), matches[0]->GetDefault()); + } + } +} + +void TestSegmentation() { + auto dict = DictTestUtils::CreateDictGroupForConversion(); + auto segmentation = SegmentationPtr(new MaxMatchSegmentation(dict)); + const auto& segments = segmentation->Segment(utf8("太后的头发干燥")); + AssertEquals(4, segments->Length()); + AssertEquals(utf8("太后"), string(segments->At(0))); + AssertEquals(utf8("的"), string(segments->At(1))); + AssertEquals(utf8("头发"), string(segments->At(2))); + AssertEquals(utf8("干燥"), string(segments->At(3))); +} + +void TestConversion() { + auto dict = DictTestUtils::CreateDictGroupForConversion(); + auto conversion = ConversionPtr(new Conversion(dict)); + const string& input = utf8("太后的头发干燥"); + const string& expected = utf8("太后的頭髮乾燥"); + { + string converted = conversion->Convert(input); + AssertEquals(expected, converted); + } + { + string converted = conversion->Convert(input.c_str()); + AssertEquals(expected, converted); + } +} + +void TestConversionChain() { + // Dict + auto dict = DictTestUtils::CreateDictGroupForConversion(); + auto conversion = ConversionPtr(new Conversion(dict)); + // Variants + auto dictVariants = DictTestUtils::CreateDictForTaiwanVariants(); + auto conversionVariants = ConversionPtr(new Conversion(dictVariants)); + list conversions; + conversions.push_back(conversion); + conversions.push_back(conversionVariants); + auto conversionChain = ConversionChainPtr(new ConversionChain(conversions)); + auto converted = conversionChain->Convert( + SegmentsPtr(new Segments{utf8("里面")})); + SegmentsAssertEquals(SegmentsPtr(new Segments{utf8("裡面")}), converted); +} + +const string CONFIG_TEST_PATH = "config_test/config_test.json"; + +void TestConfigConverter() { + Config config; + auto converter = config.NewFromFile(CONFIG_TEST_PATH); + const string& input = utf8("燕燕于飞差池其羽之子于归远送于野"); + const string& expected = utf8("燕燕于飛差池其羽之子于歸遠送於野"); + { + string converted = converter->Convert(input); + AssertEquals(expected, converted); + } + { + char output[1024]; + size_t length = converter->Convert(input.c_str(), output); + AssertEquals(expected.length(), length); + AssertEquals(expected, output); + } + { + string path = "/opencc/no/such/file/or/directory"; + try { + auto converter = config.NewFromFile(path); + } catch (FileNotFound& e) { + AssertEquals(path + " not found or not accessible.", e.what()); + } + } +} + +void TestMultithreading() { + auto routine = [](std::string name) { + SimpleConverter converter(name); + string converted = converter.Convert(utf8("燕燕于飞差池其羽之子于归远送于野")); + AssertEquals(utf8("燕燕于飛差池其羽之子于歸遠送於野"), converted); + }; + std::thread thread1(routine, CONFIG_TEST_PATH); + std::thread thread2(routine, CONFIG_TEST_PATH); + routine(CONFIG_TEST_PATH); + thread1.join(); + thread2.join(); +} + +void TestCInterface() { + const string& text = utf8("燕燕于飞差池其羽之子于归远送于野"); + const string& expected = utf8("燕燕于飛差池其羽之子于歸遠送於野"); + { + opencc_t od = opencc_open(CONFIG_TEST_PATH.c_str()); + char* converted = opencc_convert_utf8(od, text.c_str(), (size_t)-1); + AssertEquals(expected, converted); + opencc_convert_utf8_free(converted); + AssertEquals(0, opencc_close(od)); + } + { + char output[1024]; + opencc_t od = opencc_open(CONFIG_TEST_PATH.c_str()); + size_t length = opencc_convert_utf8_to_buffer(od, text.c_str(), (size_t)-1, + output); + AssertEquals(expected.length(), length); + AssertEquals(expected, output); + AssertEquals(0, opencc_close(od)); + } + { + string path = "/opencc/no/such/file/or/directory"; + opencc_t od = opencc_open(path.c_str()); + AssertEquals(reinterpret_cast(-1), od); + AssertEquals(path + " not found or not accessible.", opencc_error()); + } +} + +int main(int argc, const char* argv[]) { + TestUtils::RunTest("TestTextDict", TestTextDict); + TestUtils::RunTest("TestBinaryDict", TestBinaryDict); + TestUtils::RunTest("TestDartsDict", TestDartsDict); + TestUtils::RunTest("TestDictGroup", TestDictGroup); + TestUtils::RunTest("TestSegmentation", TestSegmentation); + TestUtils::RunTest("TestConversion", TestConversion); + TestUtils::RunTest("TestConversionChain", TestConversionChain); + TestUtils::RunTest("TestConfigConverter", TestConfigConverter); + TestUtils::RunTest("TestMultithreading", TestMultithreading); + TestUtils::RunTest("TestCInterface", TestCInterface); +} diff --git a/test/config_test/config_test.json b/test/config_test/config_test.json new file mode 100644 index 0000000..eef6c2c --- /dev/null +++ b/test/config_test/config_test.json @@ -0,0 +1,22 @@ +{ + "name": "Configuration Test", + "segmentation": { + "type": "mmseg", + "dict": { + "type": "text", + "file": "config_test_phrases.txt" + } + }, + "conversion_chain": [{ + "dict": { + "type": "group", + "dicts": [{ + "type": "text", + "file": "config_test_phrases.txt" + }, { + "type": "text", + "file": "config_test_characters.txt" + }] + } + }] +} diff --git a/test/config_test/config_test_characters.txt b/test/config_test/config_test_characters.txt new file mode 100644 index 0000000..9d573dc --- /dev/null +++ b/test/config_test/config_test_characters.txt @@ -0,0 +1,2 @@ +于 於 +远 遠 diff --git a/test/config_test/config_test_phrases.txt b/test/config_test/config_test_phrases.txt new file mode 100644 index 0000000..2a73156 --- /dev/null +++ b/test/config_test/config_test_phrases.txt @@ -0,0 +1,2 @@ +燕燕于飞 燕燕于飛 +之子于归 之子于歸 diff --git a/test/testcases/hk2s.ans b/test/testcases/hk2s.ans new file mode 100644 index 0000000..bdfe917 --- /dev/null +++ b/test/testcases/hk2s.ans @@ -0,0 +1,3 @@ +虚伪叹息 +潮湿灶台 +沙河涌汹涌的波浪 \ No newline at end of file diff --git a/test/testcases/hk2s.in b/test/testcases/hk2s.in new file mode 100644 index 0000000..4ea654e --- /dev/null +++ b/test/testcases/hk2s.in @@ -0,0 +1,3 @@ +虛偽歎息 +潮濕灶台 +沙河涌洶湧的波浪 \ No newline at end of file diff --git a/test/testcases/mix2zhs.ans b/test/testcases/mix2zhs.ans deleted file mode 100644 index b42d207..0000000 --- a/test/testcases/mix2zhs.ans +++ /dev/null @@ -1,2 +0,0 @@ -为什么简繁混杂是一个难题? -马拉松是一种有益身心的活动。 diff --git a/test/testcases/mix2zhs.in b/test/testcases/mix2zhs.in deleted file mode 100644 index 031c973..0000000 --- a/test/testcases/mix2zhs.in +++ /dev/null @@ -1,2 +0,0 @@ -爲什么簡繁混杂是一個難題? -馬拉松是一种有益身心的活动。 diff --git a/test/testcases/mix2zht.ans b/test/testcases/mix2zht.ans deleted file mode 100644 index ed5a42b..0000000 --- a/test/testcases/mix2zht.ans +++ /dev/null @@ -1,2 +0,0 @@ -爲什麼簡繁混雜是一個難題? -馬拉松是一種有益身心的活動。 diff --git a/test/testcases/mix2zht.in b/test/testcases/mix2zht.in deleted file mode 100644 index 031c973..0000000 --- a/test/testcases/mix2zht.in +++ /dev/null @@ -1,2 +0,0 @@ -爲什么簡繁混杂是一個難題? -馬拉松是一种有益身心的活动。 diff --git a/test/testcases/s2hk.ans b/test/testcases/s2hk.ans new file mode 100644 index 0000000..4ea654e --- /dev/null +++ b/test/testcases/s2hk.ans @@ -0,0 +1,3 @@ +虛偽歎息 +潮濕灶台 +沙河涌洶湧的波浪 \ No newline at end of file diff --git a/test/testcases/s2hk.in b/test/testcases/s2hk.in new file mode 100644 index 0000000..bdfe917 --- /dev/null +++ b/test/testcases/s2hk.in @@ -0,0 +1,3 @@ +虚伪叹息 +潮湿灶台 +沙河涌汹涌的波浪 \ No newline at end of file diff --git a/test/testcases/zhs2zht.ans b/test/testcases/s2t.ans similarity index 87% rename from test/testcases/zhs2zht.ans rename to test/testcases/s2t.ans index 0288fe8..515de37 100644 --- a/test/testcases/zhs2zht.ans +++ b/test/testcases/s2t.ans @@ -4,3 +4,6 @@ 燕燕于飛,差池其羽。之子于歸,遠送於野。 請成相,世之殃,愚闇愚闇墮賢良。人主無賢,如瞽無相何倀倀!請布基,慎聖人,愚而自專事不治。主忌苟勝,羣臣莫諫必逢災。 曾經有一份真誠的愛情放在我面前,我沒有珍惜,等我失去的時候我才後悔莫及。人事間最痛苦的事莫過於此。如果上天能夠給我一個再來一次得機會,我會對那個女孩子說三個字,我愛你。如果非要在這份愛上加個期限,我希望是,一萬年。 +新的理論被發現了。 +鮎魚和鮎魚是一種生物。 +金胄不是金色的甲冑。 \ No newline at end of file diff --git a/test/testcases/zhs2zht.in b/test/testcases/s2t.in similarity index 87% rename from test/testcases/zhs2zht.in rename to test/testcases/s2t.in index dc011f3..8e881b7 100644 --- a/test/testcases/zhs2zht.in +++ b/test/testcases/s2t.in @@ -4,3 +4,6 @@ 燕燕于飞,差池其羽。之子于归,远送于野。 请成相,世之殃,愚暗愚暗堕贤良。人主无贤,如瞽无相何伥伥!请布基,慎圣人,愚而自专事不治。主忌苟胜,群臣莫谏必逢灾。 曾经有一份真诚的爱情放在我面前,我没有珍惜,等我失去的时候我才后悔莫及。人事间最痛苦的事莫过于此。如果上天能够给我一个再来一次得机会,我会对那个女孩子说三个字,我爱你。如果非要在这份爱上加个期限,我希望是,一万年。 +新的理论被发现了。 +鲶鱼和鲇鱼是一种生物。 +金胄不是金色的甲胄。 \ No newline at end of file diff --git a/test/testcases/s2tw.ans b/test/testcases/s2tw.ans new file mode 100644 index 0000000..3ce62d5 --- /dev/null +++ b/test/testcases/s2tw.ans @@ -0,0 +1,2 @@ +著裝汙染虛偽發洩稜柱群眾裡面 +鯰魚和鯰魚是一種生物。 \ No newline at end of file diff --git a/test/testcases/s2tw.in b/test/testcases/s2tw.in new file mode 100644 index 0000000..5f9967b --- /dev/null +++ b/test/testcases/s2tw.in @@ -0,0 +1,2 @@ +着装污染虚伪发泄棱柱群众里面 +鲶鱼和鲇鱼是一种生物。 \ No newline at end of file diff --git a/test/testcases/zhs2zhtw_vp.ans b/test/testcases/s2twp.ans similarity index 100% rename from test/testcases/zhs2zhtw_vp.ans rename to test/testcases/s2twp.ans diff --git a/test/testcases/zhs2zhtw_p.in b/test/testcases/s2twp.in similarity index 100% rename from test/testcases/zhs2zhtw_p.in rename to test/testcases/s2twp.in diff --git a/test/testcases/zht2zhs.ans b/test/testcases/t2s.ans similarity index 99% rename from test/testcases/zht2zhs.ans rename to test/testcases/t2s.ans index f7cc7c5..5185049 100644 --- a/test/testcases/zht2zhs.ans +++ b/test/testcases/t2s.ans @@ -1 +1 @@ -曾经有一份真诚的爱情放在我面前,我没有珍惜,等我失去的时候我才后悔莫及。人事间最痛苦的事莫过于此。如果上天能够给我一个再来一次得机会,我会对那个女孩子说三个字,我爱你。如果非要在这份爱上加个期限,我希望是,一万年。 +曾经有一份真诚的爱情放在我面前,我没有珍惜,等我失去的时候我才后悔莫及。人事间最痛苦的事莫过于此。如果上天能够给我一个再来一次得机会,我会对那个女孩子说三个字,我爱你。如果非要在这份爱上加个期限,我希望是,一万年。 \ No newline at end of file diff --git a/test/testcases/zht2zhs.in b/test/testcases/t2s.in similarity index 99% rename from test/testcases/zht2zhs.in rename to test/testcases/t2s.in index fd10e9a..86de44a 100644 --- a/test/testcases/zht2zhs.in +++ b/test/testcases/t2s.in @@ -1 +1 @@ -曾經有一份真誠的愛情放在我面前,我沒有珍惜,等我失去的時候我才後悔莫及。人事間最痛苦的事莫過於此。如果上天能夠給我一個再來一次得機會,我會對那個女孩子說三個字,我愛你。如果非要在這份愛上加個期限,我希望是,一萬年。 +曾經有一份真誠的愛情放在我面前,我沒有珍惜,等我失去的時候我才後悔莫及。人事間最痛苦的事莫過於此。如果上天能夠給我一個再來一次得機會,我會對那個女孩子說三個字,我愛你。如果非要在這份愛上加個期限,我希望是,一萬年。 \ No newline at end of file diff --git a/test/testcases/tw2s.ans b/test/testcases/tw2s.ans new file mode 100644 index 0000000..b010f7b --- /dev/null +++ b/test/testcases/tw2s.ans @@ -0,0 +1 @@ +着装著作污染虚伪发泄棱柱群众里面 \ No newline at end of file diff --git a/test/testcases/tw2s.in b/test/testcases/tw2s.in new file mode 100644 index 0000000..5595eb5 --- /dev/null +++ b/test/testcases/tw2s.in @@ -0,0 +1 @@ +著裝著作汙染虛偽發洩稜柱群眾裡面 \ No newline at end of file diff --git a/test/testcases/zhs2zhtw_vp.in b/test/testcases/tw2sp.ans similarity index 100% rename from test/testcases/zhs2zhtw_vp.in rename to test/testcases/tw2sp.ans diff --git a/test/testcases/zhtw2zhcn_s.in b/test/testcases/tw2sp.in similarity index 100% rename from test/testcases/zhtw2zhcn_s.in rename to test/testcases/tw2sp.in diff --git a/test/testcases/zhs2zhtw_p.ans b/test/testcases/zhs2zhtw_p.ans deleted file mode 100644 index f537c80..0000000 --- a/test/testcases/zhs2zhtw_p.ans +++ /dev/null @@ -1,3 +0,0 @@ -滑鼠裏面的矽二極體壞了,導致游標解析度降低。 -我們在寮國的伺服器的硬碟需要使用網際網路演算法軟體解決非同步的問題。 -爲什麼你在牀裏面睡着? \ No newline at end of file diff --git a/test/testcases/zhtw2zhcn_s.ans b/test/testcases/zhtw2zhcn_s.ans deleted file mode 100644 index 46668e2..0000000 --- a/test/testcases/zhtw2zhcn_s.ans +++ /dev/null @@ -1,3 +0,0 @@ -鼠标里面的硅二极管坏了,导致光标分辨率降低。 -我们在老挝的服务器的硬盘需要使用互联网算法软件解决异步的问题。 -为什么你在床里面睡着? \ No newline at end of file diff --git a/test/testcases/zhtw2zhcn_t.ans b/test/testcases/zhtw2zhcn_t.ans deleted file mode 100644 index 9703b41..0000000 --- a/test/testcases/zhtw2zhcn_t.ans +++ /dev/null @@ -1,3 +0,0 @@ -鼠標裏面的硅二極管壞了,導致光標分辨率降低。 -我們在老撾的服務器的硬盤需要使用互聯網算法軟件解決異步的問題。 -爲什麼你在牀裏面睡着? \ No newline at end of file diff --git a/test/testcases/zhtw2zhcn_t.in b/test/testcases/zhtw2zhcn_t.in deleted file mode 100644 index 023fc12..0000000 --- a/test/testcases/zhtw2zhcn_t.in +++ /dev/null @@ -1,3 +0,0 @@ -滑鼠裡面的矽二極體壞了,導致游標解析度降低。 -我們在寮國的伺服器的硬碟需要使用網際網路演算法軟體解決非同步的問題。 -為什麼你在床裡面睡著? \ No newline at end of file -- 2.30.2